From 86722c1337812e6d201cd68f2dcbbb057bee8981 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Mon, 24 Feb 2025 15:25:44 +0000 Subject: [PATCH 1/2] lisa.analysis.status: Fix use of pl.Expr.shift(fill_value=...) FIX pl.Expr.shift(fill_value=...) parameter takes a scalar value rather than a column. Ensure we provide one rather than the other. --- lisa/analysis/status.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lisa/analysis/status.py b/lisa/analysis/status.py index d1679869d..72b9e5ca5 100644 --- a/lisa/analysis/status.py +++ b/lisa/analysis/status.py @@ -65,7 +65,7 @@ class StatusAnalysis(TraceAnalysisBase): 1, # We want to select the first row, so make sure the filter # evaluates to true at that index. - fill_value=pl.col('overutilized').not_(), + fill_value=pl.col('overutilized').not_().first(), ) ) df = df_refit_index(df, window=trace.window) -- GitLab From 4bc2cc223a7d9f8af6d7b1c00663516ee7e4a2ba Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Mon, 24 Feb 2025 16:16:06 +0000 Subject: [PATCH 2/2] lisa.trace: Hide polars streaming engine DeprecationWarning FIX Hide the DeprecationWarning from polars about the streaming engine and ensure the fallback on the non-streaming engine triggers transparently at the first problem. --- lisa/trace.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/lisa/trace.py b/lisa/trace.py index 3820c537d..801d9c602 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -4552,9 +4552,15 @@ class _TraceCache(Loggable): elif isinstance(data, pl.LazyFrame): with pl.StringCache(): try: - data.sink_parquet(path, **kwargs) - # Some LazyFrame cannot be sunk lazily to a parquet file - except polars.exceptions.InvalidOperationError: + # TOOD: revisit when polars streaming engine is complete + # and it does not raise a DeprecationWarning anymore. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=DeprecationWarning) + data.sink_parquet(path, **kwargs) + # The streaming engine may have issues with some LazyFrames, so + # fall back on collecting. + except Exception: + path.unlink(missing_ok=True) data.collect().write_parquet(path, **kwargs) else: data.to_parquet(path, **kwargs) @@ -4697,7 +4703,7 @@ class _TraceCache(Loggable): except KeyError: swap_entry = _CacheDataSwapEntry(cache_desc_nf) - data_path = os.path.join(swap_dir, swap_entry.data_filename) + data_path = Path(swap_dir, swap_entry.data_filename) # If that would make the swap dir too large, try to do some cleanup if self._estimate_data_swap_size(data) + self._swap_size > self.max_swap_size: @@ -4737,7 +4743,7 @@ class _TraceCache(Loggable): self._swap_content[swap_entry.cache_desc_nf] = swap_entry try: - data_swapped_size = os.stat(data_path).st_size + data_swapped_size = data_path.stat().st_size except FileNotFoundError: data_swapped_size = 0 -- GitLab