From ace6b1ff3018fedcd2945226ccea58192a83be18 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Tue, 24 Sep 2024 15:04:44 +0100 Subject: [PATCH 1/5] lisa.trace: Support polars 1.8.1 --- lisa/trace.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lisa/trace.py b/lisa/trace.py index 5c27f66cb..bf0110ec5 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -283,10 +283,14 @@ def _logical_plan_update_paths(plan, update_path): else: return paths + # The location is based on the version of polars, since the + # JSON format is unstable. locs = [ ['paths'], - # Since polars 1.7.0, paths are stored in a new location - ['sources', 'sources', 'Paths'] + # 1.8.1 + ['sources', 'Paths'], + # 1.7.0 + ['sources', 'sources', 'Paths'], ] for loc in locs: try: -- GitLab From a9d5d2262328f9c97d66b545f3fdc5fe56f62f10 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Tue, 24 Sep 2024 15:06:08 +0100 Subject: [PATCH 2/5] setup.py: Adjust polars bounds --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 07f03ee9c..7e98f7de3 100755 --- a/setup.py +++ b/setup.py @@ -137,8 +137,8 @@ if __name__ == "__main__": "panel", "colorcet", # Avoid: - # polars 1.7.0: https://github.com/pola-rs/polars/issues/18719 - "polars >= 1.0.0, < 2.0.0, < 1.7.0", + # polars 1.7.0, 1.7.1: https://github.com/pola-rs/polars/issues/18719 + "polars >= 1.0.0, < 2.0.0, != 1.7.0, != 1.7.1", # Pandas >= 1.0.0 has support for new nullable dtypes # Pandas 1.2.0 has broken barplots: # https://github.com/pandas-dev/pandas/issues/38947 -- GitLab From 1eb5177922f97550258d036992dad3ad4e4ca729 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Tue, 26 Nov 2024 10:41:52 +0000 Subject: [PATCH 3/5] lisa.analysis.tasks: Support polars 1.15.0 FIX Polars 1.15.0 got a change in behavior when there is a single column for pl.DataFrame.rows_by_key(): https://github.com/pola-rs/polars/issues/19994 --- lisa/analysis/tasks.py | 6 +++++- setup.py | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lisa/analysis/tasks.py b/lisa/analysis/tasks.py index d8328eefd..86f8036e3 100644 --- a/lisa/analysis/tasks.py +++ b/lisa/analysis/tasks.py @@ -320,8 +320,12 @@ class TasksAnalysis(TraceAnalysisBase): df = df.collect() def finalize(df, key_col): + assert len(df.columns) == 2 # Aggregate the values for each key and convert to python types - return dict(df.rows_by_key(key_col)) + return { + key: [x[0] for x in values] + for key, values in df.rows_by_key(key_col).items() + } name_to_pid = finalize(df, 'name') pid_to_name = finalize(df, 'pid') diff --git a/setup.py b/setup.py index 7e98f7de3..24df43668 100755 --- a/setup.py +++ b/setup.py @@ -138,7 +138,9 @@ if __name__ == "__main__": "colorcet", # Avoid: # polars 1.7.0, 1.7.1: https://github.com/pola-rs/polars/issues/18719 - "polars >= 1.0.0, < 2.0.0, != 1.7.0, != 1.7.1", + # Require: + # polars 1.15.0: https://github.com/pola-rs/polars/issues/19994 + "polars >= 1.15.0, < 2.0.0", # Pandas >= 1.0.0 has support for new nullable dtypes # Pandas 1.2.0 has broken barplots: # https://github.com/pandas-dev/pandas/issues/38947 -- GitLab From fd92a56fb6d5aa6a791983c7707a1d3c836a468e Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Tue, 26 Nov 2024 10:59:45 +0000 Subject: [PATCH 4/5] tests: Fix test_get_task_id FIX Fix the test checking that a given comm or pid can only be resolved by TasksAnalysis.tasks.get_task_id() if it is unambiguously matching a single (pid, comm) tuple. The test is testing the "sh" comm is ambiguous, as lots of processes start their life with the "sh" name (because of shell forking, and the child is only renamed in a later phase). However, a given PID might only ever have the "sh" name if it was indeed a shell process. Same applies for sshd processes. Therefore, remove the PIDs that can successfully be paired with a single comm, and make sure we test the entire list, not just until the first exception is raised. --- tests/test_trace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_trace.py b/tests/test_trace.py index 78699612a..e6ed30ee0 100644 --- a/tests/test_trace.py +++ b/tests/test_trace.py @@ -154,8 +154,8 @@ class TraceTestCase(StorageTestCase): for x in (pid, name, task_id, task_id2, task_id3, task_id_tuple): assert self.trace.ana.tasks.get_task_id(x) == task_id - with pytest.raises(ValueError): - for x in ('sh', 'sshd', 1639, 1642, 1702, 1717, 1718): + for x in ('sh', 'sshd'): + with pytest.raises(ValueError): self.trace.ana.tasks.get_task_id(x) def test_get_task_name_pids(self): -- GitLab From 35550ddffdb94adc51436f58b5545c18561ab12d Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Wed, 27 Nov 2024 17:37:46 +0000 Subject: [PATCH 5/5] lisa: Silence polars fork warning FIX Avoid polars fork warning as devlib triggers it a lot due to its use of subprocess.Popen(preexec_fn=...) https://github.com/pola-rs/polars/issues/20000 --- lisa/__init__.py | 4 ++++ setup.py | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lisa/__init__.py b/lisa/__init__.py index 2990da167..ccb42e037 100644 --- a/lisa/__init__.py +++ b/lisa/__init__.py @@ -40,4 +40,8 @@ warnings.filterwarnings( module=r'__main__', ) +# Work around the warnings reported here: +# https://github.com/pola-rs/polars/issues/20000 +os.environ.setdefault('POLARS_ALLOW_FORKING_THREAD', '1') + # vim :set tabstop=4 shiftwidth=4 textwidth=80 expandtab diff --git a/setup.py b/setup.py index 24df43668..4ee661f2c 100755 --- a/setup.py +++ b/setup.py @@ -139,8 +139,9 @@ if __name__ == "__main__": # Avoid: # polars 1.7.0, 1.7.1: https://github.com/pola-rs/polars/issues/18719 # Require: - # polars 1.15.0: https://github.com/pola-rs/polars/issues/19994 - "polars >= 1.15.0, < 2.0.0", + # polars >= 1.15.0: https://github.com/pola-rs/polars/issues/19994 + # polars >= 1.16.0: https://github.com/pola-rs/polars/issues/20000 + "polars >= 1.16.0, < 2.0.0", # Pandas >= 1.0.0 has support for new nullable dtypes # Pandas 1.2.0 has broken barplots: # https://github.com/pandas-dev/pandas/issues/38947 -- GitLab