From 94afef422dd5e1b9f92aee3879138a98fb6421cb Mon Sep 17 00:00:00 2001
From: Douglas Raillard <douglas.raillard@arm.com>
Date: Fri, 31 Jan 2025 17:29:03 +0000
Subject: [PATCH 1/7] tools/lisa-plot: Ensure we never silently consume an
 exception

FIX

Ensure we always display something in case of exception, even if the
exception has no message.
---
 lisa/_cli_tools/lisa_plot.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lisa/_cli_tools/lisa_plot.py b/lisa/_cli_tools/lisa_plot.py
index 90ad3b8b9..132df2de1 100755
--- a/lisa/_cli_tools/lisa_plot.py
+++ b/lisa/_cli_tools/lisa_plot.py
@@ -147,7 +147,11 @@ def handle_plot_excep(exit_on_error=True):
         else:
             excep_msg = 'Please specify --plat-info with the "{}" filled in'.format(e.args[1])
     except Exception as e:
-        excep_msg = str(e)
+        msg = str(e)
+        if msg:
+            excep_msg = msg
+        else:
+            excep_msg = e.__class__.__qualname__
     else:
         excep_msg = None
 
-- 
GitLab


From 580ba6ddc0f8047abc454c3d1697bf442a7d174c Mon Sep 17 00:00:00 2001
From: Douglas Raillard <douglas.raillard@arm.com>
Date: Fri, 31 Jan 2025 16:17:51 +0000
Subject: [PATCH 2/7] lisa.analysis.status: Make
 StatusAnalysis.plot_overutilized() a lot faster

FEATURE

Use VSpans instead of VSpan to only create a single object to display,
leading to major speedups.
---
 lisa/analysis/status.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/lisa/analysis/status.py b/lisa/analysis/status.py
index 5a01ea845..f1f56ca0b 100644
--- a/lisa/analysis/status.py
+++ b/lisa/analysis/status.py
@@ -89,23 +89,17 @@ class StatusAnalysis(TraceAnalysisBase):
         df = self.df_overutilized()
         if not df.empty:
             df = df_refit_index(df, window=self.trace.window)
+            df = df[df['overutilized'] != 0]
+            df = df[['len']].reset_index()
 
             # Compute intervals in which the system is reported to be overutilized
-            return hv.Overlay(
-                [
-                    hv.VSpan(
-                        start,
-                        start + delta,
-                        label='Overutilized'
-                    ).options(
-                        color='red',
-                        alpha=0.05,
-                    )
-                    for start, delta, overutilized in df[['len', 'overutilized']].itertuples()
-                    if overutilized
-                ]
+            return hv.VSpans(
+                (df['Time'], df['Time'] + df['len']),
+                label='Overutilized'
             ).options(
-                title='System-wide overutilized status'
+                color='red',
+                alpha=0.05,
+                title='System-wide overutilized status',
             )
         else:
             return _hv_neutral()
-- 
GitLab


From 79144a98f4109008aa055714da5d85cbc23d235d Mon Sep 17 00:00:00 2001
From: Douglas Raillard <douglas.raillard@arm.com>
Date: Fri, 31 Jan 2025 16:29:25 +0000
Subject: [PATCH 3/7] lisa.analysis.latency: Make
 LatencyAnalysis.plot_latency_bands() a lot faster

FEATURE

Use VSpans instead of VSpan to only create a single object to display,
leading to major speedups.
---
 lisa/analysis/latency.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/lisa/analysis/latency.py b/lisa/analysis/latency.py
index f4a96649e..69a32f800 100644
--- a/lisa/analysis/latency.py
+++ b/lisa/analysis/latency.py
@@ -406,20 +406,14 @@ class LatencyAnalysis(TraceAnalysisBase):
             df = df_refit_index(df, window=self.trace.window)
             if df.empty:
                 return _hv_neutral()
-
-            return hv.Overlay(
-                [
-                    hv.VSpan(
-                        start,
-                        start + duration,
-                        label=label,
-                    ).options(
-                        alpha=0.5,
-                    )
-                    for start, duration in df[[column]].itertuples()
-                ]
-            )
-
+            else:
+                df = df[[column]].reset_index()
+                return hv.VSpans(
+                    (df['Time'], df['Time'] + df[column]),
+                    label=label,
+                ).options(
+                    alpha=0.5,
+                )
         return (
             plot_bands(wkl_df, "wakeup_latency", "Wakeup latencies") *
             plot_bands(prt_df, "preempt_latency", "Preemption latencies")
-- 
GitLab


From 100914c9bbbb8e374bad5c8939cb4da077cca44d Mon Sep 17 00:00:00 2001
From: Douglas Raillard <douglas.raillard@arm.com>
Date: Fri, 31 Jan 2025 16:46:42 +0000
Subject: [PATCH 4/7] lisa.analysis.frequency: Add
 plot_cpu_frequencies(overutilized=True) parameter

FEATURE

Allow controlling whether plot_cpu_frequencies() display the
overutilized state or not.
---
 lisa/analysis/frequency.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py
index 8a056baf7..eaa3e5cd6 100644
--- a/lisa/analysis/frequency.py
+++ b/lisa/analysis/frequency.py
@@ -391,7 +391,7 @@ class FrequencyAnalysis(TraceAnalysisBase):
 
     @TraceAnalysisBase.plot_method
     @df_cpu_frequency.used_events
-    def plot_cpu_frequencies(self, cpu: CPU, average: bool=True):
+    def plot_cpu_frequencies(self, cpu: CPU, average: bool=True, overutilized: bool=True):
         """
         Plot frequency for the specified CPU
 
@@ -402,6 +402,9 @@ class FrequencyAnalysis(TraceAnalysisBase):
             frequency average.
         :type average: bool
 
+        :param overutilized: If ``True``, add the overutilized state as an overlay.
+        :type overutilized: bool
+
         If ``sched_overutilized`` events are available, the plots will also
         show the intervals of time where the system was overutilized.
         """
@@ -425,9 +428,10 @@ class FrequencyAnalysis(TraceAnalysisBase):
         if average and avg > 0:
             fig *= hv.HLine(avg, group='average').opts(color='red')
 
-        plot_overutilized = self.ana.status.plot_overutilized
-        if self.trace.has_events(plot_overutilized.used_events):
-            fig *= plot_overutilized()
+        if overutilized:
+            plot_overutilized = self.ana.status.plot_overutilized
+            if self.trace.has_events(plot_overutilized.used_events):
+                fig *= plot_overutilized()
 
         return fig
 
-- 
GitLab


From 88ac0d86196922c9f59b15e27d668b0fe2334a69 Mon Sep 17 00:00:00 2001
From: Douglas Raillard <douglas.raillard@arm.com>
Date: Fri, 31 Jan 2025 17:37:41 +0000
Subject: [PATCH 5/7] lisa.analysis.frequency: Convert plot_cpu_frequencies()
 to polars

FEATURE
---
 lisa/analysis/frequency.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py
index eaa3e5cd6..6c293210a 100644
--- a/lisa/analysis/frequency.py
+++ b/lisa/analysis/frequency.py
@@ -409,13 +409,15 @@ class FrequencyAnalysis(TraceAnalysisBase):
         show the intervals of time where the system was overutilized.
         """
         logger = self.logger
-        df = self.df_cpu_frequency(cpu)
+        df = self.df_cpu_frequency(cpu, df_fmt='polars-lazyframe')
 
         if "freqs" in self.trace.plat_info:
             frequencies = self.trace.plat_info['freqs'][cpu]
         else:
             logger.info(f"Estimating CPU{cpu} frequencies from trace")
-            frequencies = sorted(list(df.frequency.unique()))
+            frequencies = sorted(
+                df.select(pl.col('frequency').unique()).collect()['frequency'].to_list()
+            )
             logger.debug(f"Estimated frequencies: {frequencies}")
 
         avg = self.get_average_cpu_frequency(cpu)
@@ -423,7 +425,10 @@ class FrequencyAnalysis(TraceAnalysisBase):
             "Average frequency for CPU{} : {:.3f} GHz".format(cpu, avg / 1e6))
 
         df = df_refit_index(df, window=self.trace.window)
-        fig = plot_signal(df['frequency'], name=f'Frequency of CPU{cpu} (Hz)')
+        fig = plot_signal(
+            df.select(('Time', 'frequency')),
+            name=f'Frequency of CPU{cpu} (Hz)',
+        )
 
         if average and avg > 0:
             fig *= hv.HLine(avg, group='average').opts(color='red')
-- 
GitLab


From 32756153473767954712dbbcabe6c2b15710a1d6 Mon Sep 17 00:00:00 2001
From: Douglas Raillard <douglas.raillard@arm.com>
Date: Fri, 31 Jan 2025 17:25:17 +0000
Subject: [PATCH 6/7] lisa.analysis.status: Convert StatusAnalysis to polars

FEATURE

Convert the whole analysis to polars to avoid conversion overheads.
---
 lisa/analysis/status.py | 76 ++++++++++++++++++++++++++---------------
 tests/test_trace.py     |  8 +++--
 2 files changed, 54 insertions(+), 30 deletions(-)

diff --git a/lisa/analysis/status.py b/lisa/analysis/status.py
index f1f56ca0b..d1679869d 100644
--- a/lisa/analysis/status.py
+++ b/lisa/analysis/status.py
@@ -20,10 +20,11 @@
 """ System Status Analaysis Module """
 
 import holoviews as hv
+import polars as pl
 
 from lisa.analysis.base import TraceAnalysisBase
 from lisa.trace import requires_events
-from lisa.datautils import df_refit_index, df_add_delta, df_deduplicate
+from lisa.datautils import df_refit_index, df_add_delta, df_deduplicate, _df_to
 from lisa.notebook import _hv_neutral
 
 
@@ -52,29 +53,48 @@ class StatusAnalysis(TraceAnalysisBase):
           * A ``overutilized`` column (the overutilized status at a given time)
           * A ``len`` column (the time spent in that overutilized status)
         """
+        trace = self.trace.get_view(df_fmt='polars-lazyframe')
         # Build sequence of overutilization "bands"
-        df = self.trace.df_event('sched_overutilized')
+        df = trace.df_event('sched_overutilized')
+        # Deduplicate before calling df_refit_index() since it will likely add
+        # a row with duplicated state to have the expected window end
+        # timestamp.
+        df = df.filter(
+            pl.col('overutilized') !=
+            pl.col('overutilized').shift(
+                1,
+                # We want to select the first row, so make sure the filter
+                # evaluates to true at that index.
+                fill_value=pl.col('overutilized').not_(),
+            )
+        )
+        df = df_refit_index(df, window=trace.window)
+
         # There might be a race between multiple CPUs to emit the
         # sched_overutilized event, so get rid of duplicated events
-        df = df_deduplicate(df, cols=['overutilized'], keep='first', consecutives=True)
-        df = df_add_delta(df, col='len', window=self.trace.window)
-        # Ignore the last line added by df_refit_index() with a NaN len
-        df = df.iloc[:-1]
-        return df[['len', 'overutilized']]
+        df = df.with_columns(
+            overutilized=pl.col('overutilized').cast(pl.Boolean),
+            len=pl.col('Time').diff().shift(-1),
+        )
+        return df.select(('Time', 'overutilized', 'len'))
 
     def get_overutilized_time(self):
         """
         Return the time spent in overutilized state.
         """
-        df = self.df_overutilized()
-        return df[df['overutilized'] == 1]['len'].sum()
+        df = self.df_overutilized(df_fmt='polars-lazyframe')
+        df = df.filter(pl.col('overutilized'))
+        duration = df.select(
+            pl.col('len').dt.total_nanoseconds().sum() / 1e9
+        ).collect().item()
+        return float(duration)
 
     def get_overutilized_pct(self):
         """
         The percentage of the time spent in overutilized state.
         """
         ou_time = self.get_overutilized_time()
-        return 100 * ou_time / self.trace.time_range
+        return float(100 * ou_time / self.trace.time_range)
 
 ###############################################################################
 # Plotting Methods
@@ -86,22 +106,24 @@ class StatusAnalysis(TraceAnalysisBase):
         """
         Draw the system's overutilized status as colored bands
         """
-        df = self.df_overutilized()
-        if not df.empty:
-            df = df_refit_index(df, window=self.trace.window)
-            df = df[df['overutilized'] != 0]
-            df = df[['len']].reset_index()
-
-            # Compute intervals in which the system is reported to be overutilized
-            return hv.VSpans(
-                (df['Time'], df['Time'] + df['len']),
-                label='Overutilized'
-            ).options(
-                color='red',
-                alpha=0.05,
-                title='System-wide overutilized status',
-            )
-        else:
-            return _hv_neutral()
+        df = self.df_overutilized(df_fmt='polars-lazyframe')
+
+        df = df.filter(pl.col('overutilized'))
+        df = df.select(
+            pl.col('Time'),
+            (pl.col('Time') + pl.col('len')).alias('width'),
+        )
+        df = _df_to(df, fmt='pandas')
+        df.reset_index(inplace=True)
+
+        # Compute intervals in which the system is reported to be overutilized
+        return hv.VSpans(
+            (df['Time'], df['width']),
+            label='Overutilized'
+        ).options(
+            color='red',
+            alpha=0.05,
+            title='System-wide overutilized status',
+        )
 
 # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80
diff --git a/tests/test_trace.py b/tests/test_trace.py
index e6ed30ee0..9b1c96aa4 100644
--- a/tests/test_trace.py
+++ b/tests/test_trace.py
@@ -406,16 +406,18 @@ class TestTraceView(TraceTestCase):
 
     def test_lower_slice(self):
         view = self.trace[81:]
-        assert len(view.ana.status.df_overutilized()) == 2
+        df = view.ana.status.df_overutilized()
+        assert len(df) == 3
 
     def test_upper_slice(self):
         view = self.trace[:80.402065]
         df = view.ana.status.df_overutilized()
-        assert len(view.ana.status.df_overutilized()) == 1
+        assert len(df) == 2
 
     def test_full_slice(self):
         view = self.trace[80:81]
-        assert len(view.ana.status.df_overutilized()) == 2
+        df = view.ana.status.df_overutilized()
+        assert len(df) == 3
 
     def test_time_range(self):
         expected_duration = np.nextafter(4.0, math.inf)
-- 
GitLab


From 5ce348f3aeae957ed6afe629489bd0782034aefe Mon Sep 17 00:00:00 2001
From: Douglas Raillard <douglas.raillard@arm.com>
Date: Mon, 3 Feb 2025 13:46:12 +0000
Subject: [PATCH 7/7] lisa.datautils: Remove df_refit_index(method=...)
 parameter

BREAKING CHANGE

Remove the "method" parameter and make the function effectively match
the behavior of passing method='pre'.

The parameter was never needed in practice and made implementing a
streaming version harder for no benefit. Changing the behavior from
"inclusive" to "pre" means the last row added by df_refit_index() will
not be reflecting the future value of the signal but instead the actual
current value of the signal at that timestamp. This should have very
little effect on real-world computations, as the last row is usually not
really used for anything else than its timestamp.
---
 lisa/datautils.py | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/lisa/datautils.py b/lisa/datautils.py
index ecf9f2f3c..ee9180de9 100644
--- a/lisa/datautils.py
+++ b/lisa/datautils.py
@@ -460,7 +460,7 @@ class SeriesAccessor(DataAccessor):
 
 
 @SeriesAccessor.register_accessor
-def series_refit_index(series, start=None, end=None, window=None, method='inclusive', clip_window=True):
+def series_refit_index(series, start=None, end=None, window=None, clip_window=True):
     """
     Slice a series using :func:`series_window` and ensure we have a value at
     exactly the specified boundaries, unless the signal started after the
@@ -480,13 +480,6 @@ def series_refit_index(series, start=None, end=None, window=None, method='inclus
         exclusive.
     :type window: tuple(float or None, float or None) or None
 
-    :param method: Windowing method used to select the first and last values of
-        the series using :func:`series_window`. Defaults to ``inclusive``,
-        which is suitable for signals where all the value changes have a
-        corresponding row without any fixed sample-rate constraints. If they
-        have been downsampled, ``nearest`` might be a better choice.).
-    :type method: str
-
     .. note:: If ``end`` is past the end of the data, the last row will
         be duplicated so that we can have a start and end index at the right
         location, without moving the point at which the transition to the last
@@ -496,11 +489,11 @@ def series_refit_index(series, start=None, end=None, window=None, method='inclus
     :param clip_window: Passed down to :func:`series_refit_index`.
     """
     window = _make_window(start, end, window)
-    return _pandas_refit_index(series, window, method=method)
+    return _pandas_refit_index(series, window)
 
 
 @DataFrameAccessor.register_accessor
-def df_refit_index(df, start=None, end=None, window=None, method='inclusive'):
+def df_refit_index(df, start=None, end=None, window=None):
     """
     Same as :func:`series_refit_index` but acting on :class:`pandas.DataFrame`
     """
@@ -509,7 +502,7 @@ def df_refit_index(df, start=None, end=None, window=None, method='inclusive'):
     return _dispatch(
         _polars_refit_index,
         _pandas_refit_index,
-        df, window, method
+        df, window
     )
 
 
@@ -580,17 +573,19 @@ def df_split_signals(df, signal_cols, align_start=False, window=None):
                 cols_val = {signal_cols[0]: group}
 
             if window:
-                signal = df_refit_index(signal, window=window, method='inclusive')
+                signal = df_refit_index(signal, window=window)
             yield (cols_val, signal)
 
 
-def _polars_refit_index(data, window, method):
+def _polars_refit_index(data, window):
     # TODO: maybe expose that as a param
     index = _polars_index_col(data, index='Time')
     start, end = _polars_duration_window(window)
 
-    data = _polars_window(data, window, method=method, col=index)
     index_col = pl.col(index)
+    # Ensure the data is sorted, which should be free if they already are.
+    data = data.sort(index_col)
+    data = _polars_window(data, window, method='pre', col=index)
 
     if start is not None:
         data = data.with_columns(
@@ -620,7 +615,7 @@ def _polars_refit_index(data, window, method):
     return data
 
 
-def _pandas_refit_index(data, window, method):
+def _pandas_refit_index(data, window):
     if data.empty:
         raise ValueError('Cannot refit the index of an empty dataframe or series')
 
@@ -629,7 +624,7 @@ def _pandas_refit_index(data, window, method):
         duplicate_last = False
     else:
         duplicate_last = end > data.index[-1]
-    data = _pandas_window(data, window, method=method)
+    data = _pandas_window(data, window, method='pre')
 
     if data.empty:
         return data
-- 
GitLab