From 94afef422dd5e1b9f92aee3879138a98fb6421cb Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Fri, 31 Jan 2025 17:29:03 +0000 Subject: [PATCH 1/7] tools/lisa-plot: Ensure we never silently consume an exception FIX Ensure we always display something in case of exception, even if the exception has no message. --- lisa/_cli_tools/lisa_plot.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lisa/_cli_tools/lisa_plot.py b/lisa/_cli_tools/lisa_plot.py index 90ad3b8b9..132df2de1 100755 --- a/lisa/_cli_tools/lisa_plot.py +++ b/lisa/_cli_tools/lisa_plot.py @@ -147,7 +147,11 @@ def handle_plot_excep(exit_on_error=True): else: excep_msg = 'Please specify --plat-info with the "{}" filled in'.format(e.args[1]) except Exception as e: - excep_msg = str(e) + msg = str(e) + if msg: + excep_msg = msg + else: + excep_msg = e.__class__.__qualname__ else: excep_msg = None -- GitLab From 580ba6ddc0f8047abc454c3d1697bf442a7d174c Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Fri, 31 Jan 2025 16:17:51 +0000 Subject: [PATCH 2/7] lisa.analysis.status: Make StatusAnalysis.plot_overutilized() a lot faster FEATURE Use VSpans instead of VSpan to only create a single object to display, leading to major speedups. --- lisa/analysis/status.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/lisa/analysis/status.py b/lisa/analysis/status.py index 5a01ea845..f1f56ca0b 100644 --- a/lisa/analysis/status.py +++ b/lisa/analysis/status.py @@ -89,23 +89,17 @@ class StatusAnalysis(TraceAnalysisBase): df = self.df_overutilized() if not df.empty: df = df_refit_index(df, window=self.trace.window) + df = df[df['overutilized'] != 0] + df = df[['len']].reset_index() # Compute intervals in which the system is reported to be overutilized - return hv.Overlay( - [ - hv.VSpan( - start, - start + delta, - label='Overutilized' - ).options( - color='red', - alpha=0.05, - ) - for start, delta, overutilized in df[['len', 'overutilized']].itertuples() - if overutilized - ] + return hv.VSpans( + (df['Time'], df['Time'] + df['len']), + label='Overutilized' ).options( - title='System-wide overutilized status' + color='red', + alpha=0.05, + title='System-wide overutilized status', ) else: return _hv_neutral() -- GitLab From 79144a98f4109008aa055714da5d85cbc23d235d Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Fri, 31 Jan 2025 16:29:25 +0000 Subject: [PATCH 3/7] lisa.analysis.latency: Make LatencyAnalysis.plot_latency_bands() a lot faster FEATURE Use VSpans instead of VSpan to only create a single object to display, leading to major speedups. --- lisa/analysis/latency.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/lisa/analysis/latency.py b/lisa/analysis/latency.py index f4a96649e..69a32f800 100644 --- a/lisa/analysis/latency.py +++ b/lisa/analysis/latency.py @@ -406,20 +406,14 @@ class LatencyAnalysis(TraceAnalysisBase): df = df_refit_index(df, window=self.trace.window) if df.empty: return _hv_neutral() - - return hv.Overlay( - [ - hv.VSpan( - start, - start + duration, - label=label, - ).options( - alpha=0.5, - ) - for start, duration in df[[column]].itertuples() - ] - ) - + else: + df = df[[column]].reset_index() + return hv.VSpans( + (df['Time'], df['Time'] + df[column]), + label=label, + ).options( + alpha=0.5, + ) return ( plot_bands(wkl_df, "wakeup_latency", "Wakeup latencies") * plot_bands(prt_df, "preempt_latency", "Preemption latencies") -- GitLab From 100914c9bbbb8e374bad5c8939cb4da077cca44d Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Fri, 31 Jan 2025 16:46:42 +0000 Subject: [PATCH 4/7] lisa.analysis.frequency: Add plot_cpu_frequencies(overutilized=True) parameter FEATURE Allow controlling whether plot_cpu_frequencies() display the overutilized state or not. --- lisa/analysis/frequency.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index 8a056baf7..eaa3e5cd6 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -391,7 +391,7 @@ class FrequencyAnalysis(TraceAnalysisBase): @TraceAnalysisBase.plot_method @df_cpu_frequency.used_events - def plot_cpu_frequencies(self, cpu: CPU, average: bool=True): + def plot_cpu_frequencies(self, cpu: CPU, average: bool=True, overutilized: bool=True): """ Plot frequency for the specified CPU @@ -402,6 +402,9 @@ class FrequencyAnalysis(TraceAnalysisBase): frequency average. :type average: bool + :param overutilized: If ``True``, add the overutilized state as an overlay. + :type overutilized: bool + If ``sched_overutilized`` events are available, the plots will also show the intervals of time where the system was overutilized. """ @@ -425,9 +428,10 @@ class FrequencyAnalysis(TraceAnalysisBase): if average and avg > 0: fig *= hv.HLine(avg, group='average').opts(color='red') - plot_overutilized = self.ana.status.plot_overutilized - if self.trace.has_events(plot_overutilized.used_events): - fig *= plot_overutilized() + if overutilized: + plot_overutilized = self.ana.status.plot_overutilized + if self.trace.has_events(plot_overutilized.used_events): + fig *= plot_overutilized() return fig -- GitLab From 88ac0d86196922c9f59b15e27d668b0fe2334a69 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Fri, 31 Jan 2025 17:37:41 +0000 Subject: [PATCH 5/7] lisa.analysis.frequency: Convert plot_cpu_frequencies() to polars FEATURE --- lisa/analysis/frequency.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index eaa3e5cd6..6c293210a 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -409,13 +409,15 @@ class FrequencyAnalysis(TraceAnalysisBase): show the intervals of time where the system was overutilized. """ logger = self.logger - df = self.df_cpu_frequency(cpu) + df = self.df_cpu_frequency(cpu, df_fmt='polars-lazyframe') if "freqs" in self.trace.plat_info: frequencies = self.trace.plat_info['freqs'][cpu] else: logger.info(f"Estimating CPU{cpu} frequencies from trace") - frequencies = sorted(list(df.frequency.unique())) + frequencies = sorted( + df.select(pl.col('frequency').unique()).collect()['frequency'].to_list() + ) logger.debug(f"Estimated frequencies: {frequencies}") avg = self.get_average_cpu_frequency(cpu) @@ -423,7 +425,10 @@ class FrequencyAnalysis(TraceAnalysisBase): "Average frequency for CPU{} : {:.3f} GHz".format(cpu, avg / 1e6)) df = df_refit_index(df, window=self.trace.window) - fig = plot_signal(df['frequency'], name=f'Frequency of CPU{cpu} (Hz)') + fig = plot_signal( + df.select(('Time', 'frequency')), + name=f'Frequency of CPU{cpu} (Hz)', + ) if average and avg > 0: fig *= hv.HLine(avg, group='average').opts(color='red') -- GitLab From 32756153473767954712dbbcabe6c2b15710a1d6 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Fri, 31 Jan 2025 17:25:17 +0000 Subject: [PATCH 6/7] lisa.analysis.status: Convert StatusAnalysis to polars FEATURE Convert the whole analysis to polars to avoid conversion overheads. --- lisa/analysis/status.py | 76 ++++++++++++++++++++++++++--------------- tests/test_trace.py | 8 +++-- 2 files changed, 54 insertions(+), 30 deletions(-) diff --git a/lisa/analysis/status.py b/lisa/analysis/status.py index f1f56ca0b..d1679869d 100644 --- a/lisa/analysis/status.py +++ b/lisa/analysis/status.py @@ -20,10 +20,11 @@ """ System Status Analaysis Module """ import holoviews as hv +import polars as pl from lisa.analysis.base import TraceAnalysisBase from lisa.trace import requires_events -from lisa.datautils import df_refit_index, df_add_delta, df_deduplicate +from lisa.datautils import df_refit_index, df_add_delta, df_deduplicate, _df_to from lisa.notebook import _hv_neutral @@ -52,29 +53,48 @@ class StatusAnalysis(TraceAnalysisBase): * A ``overutilized`` column (the overutilized status at a given time) * A ``len`` column (the time spent in that overutilized status) """ + trace = self.trace.get_view(df_fmt='polars-lazyframe') # Build sequence of overutilization "bands" - df = self.trace.df_event('sched_overutilized') + df = trace.df_event('sched_overutilized') + # Deduplicate before calling df_refit_index() since it will likely add + # a row with duplicated state to have the expected window end + # timestamp. + df = df.filter( + pl.col('overutilized') != + pl.col('overutilized').shift( + 1, + # We want to select the first row, so make sure the filter + # evaluates to true at that index. + fill_value=pl.col('overutilized').not_(), + ) + ) + df = df_refit_index(df, window=trace.window) + # There might be a race between multiple CPUs to emit the # sched_overutilized event, so get rid of duplicated events - df = df_deduplicate(df, cols=['overutilized'], keep='first', consecutives=True) - df = df_add_delta(df, col='len', window=self.trace.window) - # Ignore the last line added by df_refit_index() with a NaN len - df = df.iloc[:-1] - return df[['len', 'overutilized']] + df = df.with_columns( + overutilized=pl.col('overutilized').cast(pl.Boolean), + len=pl.col('Time').diff().shift(-1), + ) + return df.select(('Time', 'overutilized', 'len')) def get_overutilized_time(self): """ Return the time spent in overutilized state. """ - df = self.df_overutilized() - return df[df['overutilized'] == 1]['len'].sum() + df = self.df_overutilized(df_fmt='polars-lazyframe') + df = df.filter(pl.col('overutilized')) + duration = df.select( + pl.col('len').dt.total_nanoseconds().sum() / 1e9 + ).collect().item() + return float(duration) def get_overutilized_pct(self): """ The percentage of the time spent in overutilized state. """ ou_time = self.get_overutilized_time() - return 100 * ou_time / self.trace.time_range + return float(100 * ou_time / self.trace.time_range) ############################################################################### # Plotting Methods @@ -86,22 +106,24 @@ class StatusAnalysis(TraceAnalysisBase): """ Draw the system's overutilized status as colored bands """ - df = self.df_overutilized() - if not df.empty: - df = df_refit_index(df, window=self.trace.window) - df = df[df['overutilized'] != 0] - df = df[['len']].reset_index() - - # Compute intervals in which the system is reported to be overutilized - return hv.VSpans( - (df['Time'], df['Time'] + df['len']), - label='Overutilized' - ).options( - color='red', - alpha=0.05, - title='System-wide overutilized status', - ) - else: - return _hv_neutral() + df = self.df_overutilized(df_fmt='polars-lazyframe') + + df = df.filter(pl.col('overutilized')) + df = df.select( + pl.col('Time'), + (pl.col('Time') + pl.col('len')).alias('width'), + ) + df = _df_to(df, fmt='pandas') + df.reset_index(inplace=True) + + # Compute intervals in which the system is reported to be overutilized + return hv.VSpans( + (df['Time'], df['width']), + label='Overutilized' + ).options( + color='red', + alpha=0.05, + title='System-wide overutilized status', + ) # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/tests/test_trace.py b/tests/test_trace.py index e6ed30ee0..9b1c96aa4 100644 --- a/tests/test_trace.py +++ b/tests/test_trace.py @@ -406,16 +406,18 @@ class TestTraceView(TraceTestCase): def test_lower_slice(self): view = self.trace[81:] - assert len(view.ana.status.df_overutilized()) == 2 + df = view.ana.status.df_overutilized() + assert len(df) == 3 def test_upper_slice(self): view = self.trace[:80.402065] df = view.ana.status.df_overutilized() - assert len(view.ana.status.df_overutilized()) == 1 + assert len(df) == 2 def test_full_slice(self): view = self.trace[80:81] - assert len(view.ana.status.df_overutilized()) == 2 + df = view.ana.status.df_overutilized() + assert len(df) == 3 def test_time_range(self): expected_duration = np.nextafter(4.0, math.inf) -- GitLab From 5ce348f3aeae957ed6afe629489bd0782034aefe Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Mon, 3 Feb 2025 13:46:12 +0000 Subject: [PATCH 7/7] lisa.datautils: Remove df_refit_index(method=...) parameter BREAKING CHANGE Remove the "method" parameter and make the function effectively match the behavior of passing method='pre'. The parameter was never needed in practice and made implementing a streaming version harder for no benefit. Changing the behavior from "inclusive" to "pre" means the last row added by df_refit_index() will not be reflecting the future value of the signal but instead the actual current value of the signal at that timestamp. This should have very little effect on real-world computations, as the last row is usually not really used for anything else than its timestamp. --- lisa/datautils.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/lisa/datautils.py b/lisa/datautils.py index ecf9f2f3c..ee9180de9 100644 --- a/lisa/datautils.py +++ b/lisa/datautils.py @@ -460,7 +460,7 @@ class SeriesAccessor(DataAccessor): @SeriesAccessor.register_accessor -def series_refit_index(series, start=None, end=None, window=None, method='inclusive', clip_window=True): +def series_refit_index(series, start=None, end=None, window=None, clip_window=True): """ Slice a series using :func:`series_window` and ensure we have a value at exactly the specified boundaries, unless the signal started after the @@ -480,13 +480,6 @@ def series_refit_index(series, start=None, end=None, window=None, method='inclus exclusive. :type window: tuple(float or None, float or None) or None - :param method: Windowing method used to select the first and last values of - the series using :func:`series_window`. Defaults to ``inclusive``, - which is suitable for signals where all the value changes have a - corresponding row without any fixed sample-rate constraints. If they - have been downsampled, ``nearest`` might be a better choice.). - :type method: str - .. note:: If ``end`` is past the end of the data, the last row will be duplicated so that we can have a start and end index at the right location, without moving the point at which the transition to the last @@ -496,11 +489,11 @@ def series_refit_index(series, start=None, end=None, window=None, method='inclus :param clip_window: Passed down to :func:`series_refit_index`. """ window = _make_window(start, end, window) - return _pandas_refit_index(series, window, method=method) + return _pandas_refit_index(series, window) @DataFrameAccessor.register_accessor -def df_refit_index(df, start=None, end=None, window=None, method='inclusive'): +def df_refit_index(df, start=None, end=None, window=None): """ Same as :func:`series_refit_index` but acting on :class:`pandas.DataFrame` """ @@ -509,7 +502,7 @@ def df_refit_index(df, start=None, end=None, window=None, method='inclusive'): return _dispatch( _polars_refit_index, _pandas_refit_index, - df, window, method + df, window ) @@ -580,17 +573,19 @@ def df_split_signals(df, signal_cols, align_start=False, window=None): cols_val = {signal_cols[0]: group} if window: - signal = df_refit_index(signal, window=window, method='inclusive') + signal = df_refit_index(signal, window=window) yield (cols_val, signal) -def _polars_refit_index(data, window, method): +def _polars_refit_index(data, window): # TODO: maybe expose that as a param index = _polars_index_col(data, index='Time') start, end = _polars_duration_window(window) - data = _polars_window(data, window, method=method, col=index) index_col = pl.col(index) + # Ensure the data is sorted, which should be free if they already are. + data = data.sort(index_col) + data = _polars_window(data, window, method='pre', col=index) if start is not None: data = data.with_columns( @@ -620,7 +615,7 @@ def _polars_refit_index(data, window, method): return data -def _pandas_refit_index(data, window, method): +def _pandas_refit_index(data, window): if data.empty: raise ValueError('Cannot refit the index of an empty dataframe or series') @@ -629,7 +624,7 @@ def _pandas_refit_index(data, window, method): duplicate_last = False else: duplicate_last = end > data.index[-1] - data = _pandas_window(data, window, method=method) + data = _pandas_window(data, window, method='pre') if data.empty: return data -- GitLab