From db8e204f26ac9b46a96eb3dea085772eac9f6e4b Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 13 Nov 2018 13:49:20 +0000 Subject: [PATCH 01/56] platforms: PlatformInfo: Always collect cpus-count --- lisa/platforms/platinfo.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lisa/platforms/platinfo.py b/lisa/platforms/platinfo.py index aaa8ff7f3..dc9a0d8a4 100644 --- a/lisa/platforms/platinfo.py +++ b/lisa/platforms/platinfo.py @@ -82,7 +82,8 @@ class PlatformInfo(MultiSrcConf, HideExekallID): 'rtapp': { # Since it is expensive to compute, use an on-demand DeferredValue 'calib': DeferredValue(RTA.get_cpu_calibrations, te) - } + }, + 'cpus-count': te.target.number_of_cpus } if 'sched' in target.modules: @@ -122,7 +123,6 @@ class PlatformInfo(MultiSrcConf, HideExekallID): } topology = Topology(clusters=cpu_groups) - cpus_count = sum(len(group) for group in cpu_groups) def freq_list(group): return sorted(set( @@ -139,7 +139,6 @@ class PlatformInfo(MultiSrcConf, HideExekallID): info = { 'clusters': clusters, 'topology': topology, - 'cpus-count': cpus_count, 'freqs': freqs, } -- GitLab From ff6586268fa6b175cacfedb247002b892aa6b6b6 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 13 Nov 2018 15:02:50 +0000 Subject: [PATCH 02/56] platforms: PlatformInfo: Always collect frequency information --- lisa/platforms/platinfo.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lisa/platforms/platinfo.py b/lisa/platforms/platinfo.py index dc9a0d8a4..930e6fa66 100644 --- a/lisa/platforms/platinfo.py +++ b/lisa/platforms/platinfo.py @@ -65,7 +65,8 @@ class PlatformInfo(MultiSrcConf, HideExekallID): KeyDesc('topology', 'Compat key: CPU topology', [Topology]), KeyDesc('clusters', 'Compat key: dictionary of cluster names to list of CPU ID', [StrIntListDict]), KeyDesc('cpus-count', 'Compat key: number of CPUs', [int]), - KeyDesc('freqs', 'Compat key: dictionary of cluster names to list of frequencies', [StrIntListDict]), + KeyDesc('freq-domains', 'Frequency domains', [list]), + KeyDesc('freqs', 'Dictionnary of first cluster CPU to list of frequencies', [dict]), )) """Some keys have a reserved meaning with an associated type.""" @@ -86,6 +87,11 @@ class PlatformInfo(MultiSrcConf, HideExekallID): 'cpus-count': te.target.number_of_cpus } + if hasattr(target, 'cpufreq'): + info['freq-domains'] = list(target.cpufreq.iter_domains()) + info['freqs'] = {cpus[0] : target.cpufreq.list_frequencies(cpus[0]) + for cpus in target.cpufreq.iter_domains()} + if 'sched' in target.modules: info['cpu-capacities'] = target.sched.get_capacities(default=1024) @@ -139,7 +145,7 @@ class PlatformInfo(MultiSrcConf, HideExekallID): info = { 'clusters': clusters, 'topology': topology, - 'freqs': freqs, + # 'freqs': freqs, } return self.add_src(src, info, filter_none=True, **kwargs) -- GitLab From a45b8c0bf9ccd241c866ad7b7122d46554796685 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 15 Nov 2018 11:54:33 +0000 Subject: [PATCH 03/56] platforms/platinfo: Save frequencies for ALL CPUs Even if some CPUs share frequency domains, it's useful to record their reported available frequency - we might not always be able to figure out frequency domains, and it also saves us from having to look at frequency domains just to get the frequency of a CPU that is not the first one in the domain. --- lisa/platforms/platinfo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lisa/platforms/platinfo.py b/lisa/platforms/platinfo.py index 930e6fa66..aaa226c4f 100644 --- a/lisa/platforms/platinfo.py +++ b/lisa/platforms/platinfo.py @@ -66,7 +66,7 @@ class PlatformInfo(MultiSrcConf, HideExekallID): KeyDesc('clusters', 'Compat key: dictionary of cluster names to list of CPU ID', [StrIntListDict]), KeyDesc('cpus-count', 'Compat key: number of CPUs', [int]), KeyDesc('freq-domains', 'Frequency domains', [list]), - KeyDesc('freqs', 'Dictionnary of first cluster CPU to list of frequencies', [dict]), + KeyDesc('freqs', 'Dictionnary of CPU to list of frequencies', [dict]), )) """Some keys have a reserved meaning with an associated type.""" @@ -89,10 +89,10 @@ class PlatformInfo(MultiSrcConf, HideExekallID): if hasattr(target, 'cpufreq'): info['freq-domains'] = list(target.cpufreq.iter_domains()) - info['freqs'] = {cpus[0] : target.cpufreq.list_frequencies(cpus[0]) - for cpus in target.cpufreq.iter_domains()} + info['freqs'] = {cpu : target.cpufreq.list_frequencies(cpu) + for cpu in range(target.number_of_cpus)} - if 'sched' in target.modules: + if hasattr(target, 'sched'): info['cpu-capacities'] = target.sched.get_capacities(default=1024) return self.add_src(src, info, filter_none=True, **kwargs) -- GitLab From 793ce4c3d4182aa6e2ceec36ccbb1c0c377cc5c3 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 13 Nov 2018 15:03:37 +0000 Subject: [PATCH 04/56] analysis/base: Remove hardcoded big.LITTLE dependencies --- lisa/analysis/base.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index fe222ff31..2ac9f8818 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -44,22 +44,6 @@ class AnalysisBase: plat_info = self._trace.plat_info - # By default assume SMP system - self._big_cap = 1024 - self._little_cap = 1024 - self._big_cpus = list(range(trace.cpus_count)) - self._little_cpus = [] - - if self._trace.has_big_little: - nrg_model = plat_info['nrg-model'] - self._little_cap = nrg_model.get_cpu_capacity(nrg_model.littlest_cpus[0]) - - if ('clusters' in plat_info and - 'big' in plat_info['clusters'] and - 'little' in plat_info['clusters']): - self._big_cpus = plat_info['clusters']['big'] - self._little_cpus = plat_info['clusters']['little'] - def _plot_setup(self, width=16, height=4, ncols=1, nrows=1): figure, axes = plt.subplots( ncols=ncols, nrows=nrows, figsize=(width, height * nrows) -- GitLab From 6d221fa7ce8ac488f6dac5f2b1cf113682292d0a Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 16 Nov 2018 20:06:18 +0000 Subject: [PATCH 05/56] trace: Remove has_big_little --- lisa/trace.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lisa/trace.py b/lisa/trace.py index 7786d818c..c47ba8e4e 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -465,13 +465,6 @@ class Trace(Loggable): ############################################################################### # Trace Events Sanitize Methods ############################################################################### - @property - def has_big_little(self): - return ('clusters' in self.plat_info - and 'big' in self.plat_info['clusters'] - and 'little' in self.plat_info['clusters'] - and 'nrg-model' in self.plat_info) - def _sanitize_SchedCpuCapacity(self): """ Add more columns to cpu_capacity data frame if the energy model is -- GitLab From a2f4d349472787c244c3dc3a970c3e1bd53e0c10 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 15 Nov 2018 13:15:37 +0000 Subject: [PATCH 06/56] platforms/platinfo: Kill nrg_model derived keys --- lisa/env.py | 4 --- lisa/platforms/platinfo.py | 57 -------------------------------------- 2 files changed, 61 deletions(-) diff --git a/lisa/env.py b/lisa/env.py index 4d8c1d25f..27b0d3dc6 100644 --- a/lisa/env.py +++ b/lisa/env.py @@ -268,10 +268,6 @@ class TestEnv(Loggable, HideExekallID): # computed when actually needed. self.plat_info.add_target_src(self, fallback=True) - # Update the PlatformInfo with keys derived from the energy model - with contextlib.suppress(KeyError): - self.plat_info.add_nrg_model_src() - logger.info('Effective platform information:\n%s', self.plat_info) @classmethod diff --git a/lisa/platforms/platinfo.py b/lisa/platforms/platinfo.py index aaa226c4f..41a66322e 100644 --- a/lisa/platforms/platinfo.py +++ b/lisa/platforms/platinfo.py @@ -60,10 +60,6 @@ class PlatformInfo(MultiSrcConf, HideExekallID): KeyDesc('abi', 'ABI, e.g. "arm64"', [str]), KeyDesc('os', 'OS being used, e.g. "linux"', [str]), KeyDesc('name', 'Free-form name of the board', [str]), - - # TODO remove that once no code depend on it anymore - KeyDesc('topology', 'Compat key: CPU topology', [Topology]), - KeyDesc('clusters', 'Compat key: dictionary of cluster names to list of CPU ID', [StrIntListDict]), KeyDesc('cpus-count', 'Compat key: number of CPUs', [int]), KeyDesc('freq-domains', 'Frequency domains', [list]), KeyDesc('freqs', 'Dictionnary of CPU to list of frequencies', [dict]), @@ -97,59 +93,6 @@ class PlatformInfo(MultiSrcConf, HideExekallID): return self.add_src(src, info, filter_none=True, **kwargs) - #TODO: kill that once code depending on this has been converted to - # using the appropriate "root" data, instead of these derived values. - def add_nrg_model_src(self, nrg_model=None, src='nrg-model', **kwargs): - # Derive all the deprecated keys from the nrg_model - nrg_model = nrg_model or self['nrg-model'] - node_groups = nrg_model.node_groups - - # Sort according to max capacity found in the group - def max_capacity(group): - return max( - s.capacity - for node in group - for s in node.active_states.values() - ) - node_groups = sorted(node_groups, key=max_capacity) - cpu_groups = [ - [node.cpu for node in group] - for group in node_groups - ] - - # big.LITTLE platform - if len(cpu_groups) == 2: - cluster_names = ['little', 'big'] - # SMP platform - else: - cluster_names = [str(i) for i in range(len(cpu_groups))] - clusters = { - name: group - for name, group in zip(cluster_names, cpu_groups) - } - - topology = Topology(clusters=cpu_groups) - - def freq_list(group): - return sorted(set( - freq - for node in group - for freq in node.active_states.keys() - )) - - freqs = { - cluster_name: freq_list(group) - for cluster_name, group in zip(cluster_names, node_groups) - } - - info = { - 'clusters': clusters, - 'topology': topology, - # 'freqs': freqs, - } - - return self.add_src(src, info, filter_none=True, **kwargs) - # Internal methods used to compute some keys from a live devlib Target @classmethod -- GitLab From 4badfa54c68adc632a3f589ac9624fc70d1a7c13 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 13 Nov 2018 15:34:13 +0000 Subject: [PATCH 07/56] analysis/base: Rename _plot_setup() into setup_plot() --- lisa/analysis/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index 2ac9f8818..51a541519 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -44,9 +44,9 @@ class AnalysisBase: plat_info = self._trace.plat_info - def _plot_setup(self, width=16, height=4, ncols=1, nrows=1): + def setup_plot(self, width=16, height=4, ncols=1, nrows=1, **kwargs): figure, axes = plt.subplots( - ncols=ncols, nrows=nrows, figsize=(width, height * nrows) + ncols=ncols, nrows=nrows, figsize=(width, height * nrows), **kwargs ) # Needed for multirow plots to not overlap with each other plt.tight_layout(h_pad=3.5) @@ -134,7 +134,7 @@ class AnalysisBase: setup_plot = False if ax is None: - _, ax = self._plot_setup(width, height) + _, ax = self.setup_plot(width, height) setup_plot = True matches = dfr[pivot].unique().tolist() -- GitLab From 0e7615665b5e7201784ccae03273031cc88ed5f9 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 13 Nov 2018 15:34:35 +0000 Subject: [PATCH 08/56] analysis/base: Add a check_events() common helper --- lisa/analysis/base.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index 51a541519..8ab9a6ae7 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -36,6 +36,14 @@ class AnalysisBase: :param trace: input Trace object :type trace: :class:`trace.Trace` + + :Design notes: + + Method depending on certain trace events *must* start with a call to + :meth:`AnalysisBase.check_events`. + + Plotting methods *must* return the :class:`matplotlib.axes.Axes` instance + used by the plotting method. This lets users embed plots into subplots. """ def __init__(self, trace): @@ -52,6 +60,17 @@ class AnalysisBase: plt.tight_layout(h_pad=3.5) return figure, axes + def check_events(self, required_events): + """ + :raises: RuntimeError if some events are not available + """ + available_events = set(self._trace.events) + missing_events = set(required_events).difference(available_events) + + if missing_events: + raise RuntimeError( + "Trace is missing the following required events: {}".format(missing_events)) + def _plot_generic(self, dfr, pivot, filters=None, columns=None, prettify_name=None, width=16, height=4, drawstyle="default", ax=None, title=""): -- GitLab From b99c67e474fde5f4b4c7e2edad9b7763aaa15687 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 13 Nov 2018 18:00:36 +0000 Subject: [PATCH 09/56] analysis/base: Add some docstrings --- lisa/analysis/base.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index 8ab9a6ae7..f468a6440 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -52,7 +52,28 @@ class AnalysisBase: plat_info = self._trace.plat_info - def setup_plot(self, width=16, height=4, ncols=1, nrows=1, **kwargs): + @classmethod + def setup_plot(cls, width=16, height=4, ncols=1, nrows=1, **kwargs): + """ + Common helper for setting up a matplotlib plot + + :param width: Width of the plot (inches) + :type width: int or float + + :param height: Height of each subplot (inches) + :type height: int or float + + :param ncols: Number of plots on a single row + :type ncols: int + + :param nrows: Number of plots in a single column + :type nrows: int + + :Keywords arguments: Extra arguments to pass to :meth:`matplotlib.subplots` + + :returns: tuple(matplotlib.figure.Figure, matplotlib.axes.Axes (or an + (array of, if ``nrows`` > 1)) + """ figure, axes = plt.subplots( ncols=ncols, nrows=nrows, figsize=(width, height * nrows), **kwargs ) @@ -62,6 +83,8 @@ class AnalysisBase: def check_events(self, required_events): """ + Check that certain trace events are available in the trace + :raises: RuntimeError if some events are not available """ available_events = set(self._trace.events) -- GitLab From 55cbddb6810edf6bc4f379bbaeda0b26b7d699b1 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 13 Nov 2018 18:02:00 +0000 Subject: [PATCH 10/56] analysis/base: Add a save_plot() helper method --- lisa/analysis/base.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index f468a6440..2bdcb407d 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -17,6 +17,8 @@ import logging from collections import namedtuple +import os +import inspect import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt @@ -81,6 +83,30 @@ class AnalysisBase: plt.tight_layout(h_pad=3.5) return figure, axes + def save_plot(self, figure, filepath=None, img_format="png"): + """ + Save the plot stored in the ``figure`` + + :param figure: The plot figure + :type figure: matplotlib.figure.figure + + :param filepath: The path of the file into which the plot will be saved. + If ``None``, a path based on the trace directory and the calling method + will be used. + :type filepath: str + + :param img_format: The image format to generate + :type img_format: str + """ + if filepath is None: + module = self.__module__ + caller = inspect.stack()[1][3] + filepath = os.path.join( + self._trace.plots_dir, + "{}.{}.{}".format(module, caller, img_format)) + + figure.savefig(filepath, format=img_format) + def check_events(self, required_events): """ Check that certain trace events are available in the trace -- GitLab From c6f8fdb2c684ea05afbb70e9867a98a9d15744c2 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 13 Nov 2018 18:02:23 +0000 Subject: [PATCH 11/56] analysis: Cleanup cpu analysis Use the newly-introduced helper methods, add a load_tracking analysis module to move plotting methods out of the cpus module. --- lisa/analysis/cpus.py | 159 ++++++++++----------------------- lisa/analysis/load_tracking.py | 119 ++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 114 deletions(-) create mode 100644 lisa/analysis/load_tracking.py diff --git a/lisa/analysis/cpus.py b/lisa/analysis/cpus.py index 1228cc23c..80253615b 100644 --- a/lisa/analysis/cpus.py +++ b/lisa/analysis/cpus.py @@ -21,6 +21,8 @@ import matplotlib.pyplot as plt import pylab as pl import pandas as pd +from trappy.utils import handle_duplicate_index + from lisa.analysis.base import AnalysisBase @@ -48,19 +50,17 @@ class CpusAnalysis(AnalysisBase): :returns: :mod:`pandas.DataFrame` """ - if not self._trace.hasEvents('sched_switch'): - self._log.warning('Events [sched_switch] not found, context switch ' - 'computation not possible!') - return None + self.check_events(['sched_switch']) sched_df = self._trace.df_events('sched_switch') - cpus = list(range(self._trace.plat_info['cpus-count'])) + cpus = list(range(self._trace.cpus_count)) ctx_sw_df = pd.DataFrame( [len(sched_df[sched_df['__cpu'] == cpu]) for cpu in cpus], index=cpus, columns=['context_switch_cnt'] ) ctx_sw_df.index.name = 'cpu' + return ctx_sw_df def df_cpu_wakeups(self, cpus=None): @@ -74,10 +74,7 @@ class CpusAnalysis(AnalysisBase): row shows a time when the given ``cpu`` was woken up from idle. """ - if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, cannot ' - 'get CPU wakeup events.') - return None + self.check_events(['cpu_idle']) cpus = cpus or list(range(self._trace.cpus_count)) @@ -90,127 +87,61 @@ class CpusAnalysis(AnalysisBase): return pd.DataFrame({'cpu': sr}).sort_index() -############################################################################### -# Plotting Methods -############################################################################### - - def plot_cpu(self, cpus=None): + def signal_cpu_active(self, cpu): """ - Plot CPU-related signals for both big and LITTLE clusters. + Build a square wave representing the active (i.e. non-idle) CPU time, + i.e.: - :param cpus: list of CPUs to be plotted - :type cpus: list(int) - """ - if not self._trace.hasEvents('sched_load_avg_cpu'): - self._log.warning('Events [sched_load_avg_cpu] not found, ' - 'plot DISABLED!') - return + cpu_active[t] == 1 if the CPU is reported to be non-idle by cpuidle at + time t + cpu_active[t] == 0 otherwise - # Filter on specified cpus - if cpus is None: - cpus = sorted(self._big_cpus + self._little_cpus) + :param cpu: CPU ID + :type cpu: int - # Plot: big CPUs - bcpus = set(cpus).intersection(self._big_cpus) - if bcpus: - self._plot_cpu(bcpus, "big") + :returns: A :class:`pandas.Series` or ``None`` if the trace contains no + "cpu_idle" events + """ + self.check_events(['cpu_idle']) - # Plot: LITTLE CPUs - lcpus = set(cpus).intersection(self._little_cpus) - if lcpus: - self._plot_cpu(lcpus, "LITTLE") + idle_df = self._trace.df_events('cpu_idle') + cpu_df = idle_df[idle_df.cpu_id == cpu] + cpu_active = cpu_df.state.apply( + lambda s: 1 if s == -1 else 0 + ) -############################################################################### -# Utility Methods -############################################################################### + start_time = 0.0 + if not self._trace.ftrace.normalized_time: + start_time = self._trace.ftrace.basetime - def _plot_cpu(self, cpus, label=''): - """ - Internal method that generates plots for all input CPUs. + if cpu_active.empty: + cpu_active = pd.Series([0], index=[start_time]) + elif cpu_active.index[0] != start_time: + entry_0 = pd.Series(cpu_active.iloc[0] ^ 1, index=[start_time]) + cpu_active = pd.concat([entry_0, cpu_active]) - :param cpus: list of CPUs to be plotted - :type cpus: list(int) - """ - if label != '': - label1 = '{} '.format(label) - label2 = '_{}s'.format(label.lower()) + # Fix sequences of wakeup/sleep events reported with the same index + return handle_duplicate_index(cpu_active) - # Plot required CPUs - _, pltaxes = plt.subplots(len(cpus), 1, figsize=(16, 3*(len(cpus)))) - idx = 0 - for cpu in cpus: +############################################################################### +# Plotting Methods +############################################################################### - # Reference axes to be used - axes = pltaxes - if len(cpus) > 1: - axes = pltaxes[idx] - - # Add CPU utilization - axes.set_title('{0:s}CPU [{1:d}]'.format(label1, cpu)) - df = self._trace.df_events('sched_load_avg_cpu') - df = df[df.cpu == cpu] - if len(df): - df[['util_avg']].plot(ax=axes, drawstyle='steps-post', - alpha=0.4) - - # if self._trace.hasEvents('sched_boost_cpu'): - # df = self._trace.df_events('sched_boost_cpu') - # df = df[df.cpu == cpu] - # if len(df): - # df[['usage', 'boosted_usage']].plot( - # ax=axes, - # style=['m-', 'r-'], - # drawstyle='steps-post'); - - # Add Capacities data if avilable - if self._trace.hasEvents('cpu_capacity'): - df = self._trace.df_events('cpu_capacity') - df = df[df.cpu == cpu] - if len(df): - # data = df[['capacity', 'tip_capacity', 'max_capacity']] - # data.plot(ax=axes, style=['m', 'y', 'r'], - data = df[['capacity', 'tip_capacity']] - data.plot(ax=axes, style=['m', '--y'], - drawstyle='steps-post') - - # Add overutilized signal to the plot - self._trace.analysis.status.plot_overutilized(axes) - - axes.set_ylim(0, 1100) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - - if idx == 0: - axes.annotate("{}CPUs Signals".format(label1), - xy=(0, axes.get_ylim()[1]), - xytext=(-50, 25), - textcoords='offset points', fontsize=16) - # Disable x-axis timestamp for top-most cpus - if len(cpus) > 1 and idx < len(cpus)-1: - axes.set_xticklabels([]) - axes.set_xlabel('') - axes.grid(True) - - idx += 1 - - # Save generated plots into datadir - figname = '{}/{}cpus{}.png'.format(self._trace.plots_dir, - self._trace.plots_prefix, label2) - pl.savefig(figname, bbox_inches='tight') - - def plot_context_switch(self): + def plot_context_switch(self, filepath=None): """ Plot histogram of context switches on each CPU. """ - if not self._trace.hasEvents('sched_switch'): - self._log.warning('Events [sched_switch] not found, plot DISABLED!') - return + fig, axis = self.setup_plot(height=8) ctx_sw_df = self.df_context_switches() - ax = ctx_sw_df.plot.bar(title="Per-CPU Task Context Switches", - legend=False, - figsize=(16, 8)) - ax.grid() + ctx_sw_df.plot.bar(title="Per-CPU Task Context Switches", + legend=False, + ax=axis) + axis.grid() + + self.save_plot(fig, filepath) + return axis # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/lisa/analysis/load_tracking.py b/lisa/analysis/load_tracking.py new file mode 100644 index 000000000..fca6e0230 --- /dev/null +++ b/lisa/analysis/load_tracking.py @@ -0,0 +1,119 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (C) 2018, Arm Limited and contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" Scheduler load tracking analysis module """ + +import matplotlib.pyplot as plt +import pylab as pl +import pandas as pd + +from lisa.analysis.base import AnalysisBase + + +class LoadTrackingAnalysis(AnalysisBase): + """ + Support for scheduler load tracking analysis + + :param trace: input Trace object + :type trace: :class:`Trace` + """ + + name = 'load_tracking' + + def __init__(self, trace): + super().__init__(trace) + + def df_cpus(self): + """ + A DataFrame containing per-CPU load tracking signals + """ + try: + event = 'sched_load_cfs_rq' + self.check_events([event]) + except RuntimeError: + event = 'sched_load_avg_cpu' + self.check_events([event]) + + df = self._trace.df_events(event) + if event == 'sched_cfs_rq': + df = df[df.path == '/'] + df.drop('rbl_load') + else: + pass + + return df + + def df_tasks(self): + """ + A DataFrame containing per-task load tracking signals + """ + try: + event = 'sched_load_se' + self.check_events([event]) + except RuntimeError: + event = 'sched_load_avg_task' + self.check_events([event]) + + df = self._trace.df_events(event) + if event == 'sched_load_se': + df = df[df.path == '(null)'] + else: + pass + + return df + + + def plot_cpus(self, filepath=None, cpus=None): + """ + Plot CPU-related signals + + :param cpus: list of CPUs to be plotted + :type cpus: list(int) + """ + cpus = cpus or list(range(self._trace.cpus_count)) + fig, axes = self.setup_plot(nrows=len(cpus), sharex=True) + + cpus_df = self.df_cpus() + + for cpu in cpus: + axis = axes[cpu] if len(cpus) > 1 else axes + + # Add CPU utilization + axis.set_title('CPU{}'.format(cpu)) + df = cpus_df[cpus_df.cpu == cpu] + + if len(df): + df[['util']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) + df[['load']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) + + # Add capacities data if available + if self._trace.hasEvents('cpu_capacity'): + df = self._trace.df_events('cpu_capacity') + df = df[df.cpu == cpu] + if len(df): + data = df[['capacity', 'tip_capacity']] + data.plot(ax=axis, style=['m', '--y'], + drawstyle='steps-post') + + # Add overutilized signal to the plot + self._trace.analysis.status.plot_overutilized(axis) + + axis.set_ylim(0, 1100) + axis.set_xlim(self._trace.x_min, self._trace.x_max) + + self.save_plot(fig, filepath) + return axes -- GitLab From 95bea1b6e0690b14fb21d5f248ad61d1095765bf Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 14 Nov 2018 10:53:47 +0000 Subject: [PATCH 12/56] trace: Rename addEventsDeltas() to add_events_deltas() --- lisa/tests/kernel/scheduler/misfit.py | 2 +- lisa/trace.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lisa/tests/kernel/scheduler/misfit.py b/lisa/tests/kernel/scheduler/misfit.py index 0765fa7c9..beb7ea239 100644 --- a/lisa/tests/kernel/scheduler/misfit.py +++ b/lisa/tests/kernel/scheduler/misfit.py @@ -257,7 +257,7 @@ class StaggeredFinishes(MisfitMigrationBase): :returns: A dataframe that describes the idle status (on/off) of 'cpu' """ active_df = pd.DataFrame(self.trace.getCPUActiveSignal(cpu), columns=['state']) - self.trace.addEventsDeltas(active_df) + self.trace.add_events_deltas(active_df) return active_df def _max_idle_time(self, start, end, cpus): diff --git a/lisa/trace.py b/lisa/trace.py index c47ba8e4e..1fc34286c 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -600,7 +600,7 @@ class Trace(Loggable): return df = self.df_events('sched_overutilized') - self.addEventsDeltas(df, 'len') + self.add_events_deltas(df, 'len') # Build a stat on trace overutilization self.overutilized_time = df[df.overutilized == 1].len.sum() @@ -906,7 +906,7 @@ class Trace(Loggable): np.where(freq['state'] == 1, freq['rate'], float('nan'))) return freq - def addEventsDeltas(self, df, col_name='delta'): + def add_events_deltas(self, df, col_name='delta', inplace=True): """ Compute the time between each event in a dataframe, and store it in a new column. This only really makes sense for events tracking an @@ -935,7 +935,7 @@ class Trace(Loggable): The input dataframe is expected to have a "column" which reports the time delta between consecutive rows, as for example dataframes - generated by addEventsDeltas(). + generated by add_events_deltas(). The returned dataframe is granted to have an initial and final event at the specified "start" ("end") index values, which values -- GitLab From 52b269aa733c17888611313c3f0d44f5aa1b083e Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 14 Nov 2018 10:54:14 +0000 Subject: [PATCH 13/56] trace: Cleanup add_events_deltas() --- lisa/trace.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/lisa/trace.py b/lisa/trace.py index 1fc34286c..4c503cfa2 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -908,9 +908,20 @@ class Trace(Loggable): def add_events_deltas(self, df, col_name='delta', inplace=True): """ - Compute the time between each event in a dataframe, and store it in a - new column. This only really makes sense for events tracking an - on/off state (e.g. overutilized, idle) + Store the time between each event in a new dataframe column + + :param df: The DataFrame to operate one + :type df: pandas.DataFrame + + :param col_name: The name of the column to add + :type col_name: str + + :param inplace: Whether to operate on the passed DataFrame, or to use + a copy of it + :type inplace: bool + + This method only really makes sense for events tracking an on/off state + (e.g. overutilized, idle) """ if df.empty: return df @@ -919,14 +930,18 @@ class Trace(Loggable): raise RuntimeError("Column {} is already present in the dataframe". format(col_name)) - df['start'] = df.index - df[col_name] = (df.start - df.start.shift()).fillna(0).shift(-1) - df.drop('start', axis=1, inplace=True) + if not inplace: + df = df.copy() + + time_df = pd.DataFrame(index=df.index, data=df.index.values, columns=["start"]) + df[col_name] = (time_df.start - time_df.start.shift()).fillna(0).shift(-1) # Fix the last event, which will have a NaN duration # Set duration to trace_end - last_event df.loc[df.index[-1], col_name] = self.start_time + self.time_range - df.index[-1] + return df + @staticmethod def squash_df(df, start, end, column='delta'): """ -- GitLab From 960f2199efc9e705049fa74f5349b23fa14012d3 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 14 Nov 2018 14:33:47 +0000 Subject: [PATCH 14/56] analysis: Rework frequency analysis --- lisa/analysis/base.py | 2 - lisa/analysis/cpus.py | 44 +++++++++++++++- lisa/analysis/frequency.py | 105 ++++++++++--------------------------- lisa/analysis/idle.py | 2 +- lisa/analysis/thermal.py | 2 +- lisa/trace.py | 44 ---------------- 6 files changed, 72 insertions(+), 127 deletions(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index 2bdcb407d..86b92a165 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -29,8 +29,6 @@ from trappy.utils import listify """ Helper module for Analysis classes """ -ResidencyTime = namedtuple('ResidencyTime', ['total', 'active']) -ResidencyData = namedtuple('ResidencyData', ['label', 'residency']) class AnalysisBase: """ diff --git a/lisa/analysis/cpus.py b/lisa/analysis/cpus.py index 80253615b..b5360a5d0 100644 --- a/lisa/analysis/cpus.py +++ b/lisa/analysis/cpus.py @@ -17,8 +17,9 @@ """ CPUs Analysis Module """ -import matplotlib.pyplot as plt -import pylab as pl +import operator +from functools import reduce + import pandas as pd from trappy.utils import handle_duplicate_index @@ -124,6 +125,45 @@ class CpusAnalysis(AnalysisBase): # Fix sequences of wakeup/sleep events reported with the same index return handle_duplicate_index(cpu_active) + def signal_cluster_active(self, cluster): + """ + Build a square wave representing the active (i.e. non-idle) cluster + time, i.e.: + + cluster_active[t] == 1 if at least one CPU is reported to be non-idle + by CPUFreq at time t + cluster_active[t] == 0 otherwise + + :param cluster: list of CPU IDs belonging to a cluster + :type cluster: list(int) + + :returns: A :class:`pandas.Series` or ``None`` if the trace contains no + "cpu_idle" events + """ + active = self.signal_cpu_active(cluster[0]).to_frame(name=cluster[0]) + for cpu in cluster[1:]: + active = active.join( + self.signal_cpu_active(cpu).to_frame(name=cpu), + how='outer' + ) + + active.fillna(method='ffill', inplace=True) + # There might be NaNs in the signal where we got data from some CPUs + # before others. That will break the .astype(int) below, so drop rows + # with NaN in them. + active.dropna(inplace=True) + + # Cluster active is the OR between the actives on each CPU + # belonging to that specific cluster + cluster_active = reduce( + operator.or_, + [cpu_active.astype(int) for _, cpu_active in + active.items()] + ) + + return cluster_active + + ############################################################################### # Plotting Methods diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index fb39d571b..3e1efe2e3 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -24,7 +24,7 @@ import os import pandas as pd import pylab as pl -from lisa.analysis.base import AnalysisBase, ResidencyTime, ResidencyData +from lisa.analysis.base import AnalysisBase from lisa.utils import memoized from bart.common.Utils import area_under_curve from matplotlib.ticker import FuncFormatter @@ -47,7 +47,7 @@ class FrequencyAnalysis(AnalysisBase): # DataFrame Getter Methods ############################################################################### - def df_cpu_frequency_residency(self, cpu, total=True): + def df_cpu_frequency_residency(self, cpu): """ Get per-CPU frequency residency, i.e. amount of time CPU `cpu` spent at each frequency. @@ -67,66 +67,34 @@ class FrequencyAnalysis(AnalysisBase): if not isinstance(cpu, int): raise TypeError('Input CPU parameter must be an integer') - residency = self._get_frequency_residency(cpu) - if not residency: - return None - if total: - return residency.total - return residency.active + return self._get_frequency_residency((cpu,)) - def df_cluster_frequency_residency(self, cluster, total=True): + def df_domain_frequency_residency(self, cpu): """ - Get per-Cluster frequency residency, i.e. amount of time CLUSTER - `cluster` spent at each frequency. - - :param cluster: this can be either a list of CPU IDs belonging to a - cluster or the cluster name as specified in the platform - description - :type cluster: str or list(int) + Get per-frequency-domain frequency residency, i.e. amount of time each + domain at each frequency. - :param total: if true returns the "total" time, otherwise the "active" - time is returned - :type total: bool + :param cpu: Any CPU of the domain to analyse + :type cpu: int :returns: :mod:`pandas.DataFrame` - "total" or "active" time residency at each frequency. - :raises: KeyError """ - if isinstance(cluster, str): - try: - residency = self._get_frequency_residency( - self._trace.plat_info['clusters'][cluster.lower()] - ) - except KeyError: - self._log.warning( - 'Platform descriptor has not a cluster named [%s], ' - 'plot disabled!', cluster - ) - return None - else: - residency = self._get_frequency_residency(cluster) - if not residency: - return None - if total: - return residency.total - return residency.active + domains = self._trace.plat_info['freq-domains'] + for domain in domains: + if cpu in domain: + return self._get_frequency_residency(tuple(domain)) def df_cpu_frequency_transitions(self, cpu): """ Compute number of frequency transitions of a given CPU. - Requires cpu_frequency events to be available in the trace. - :param cpu: a CPU ID :type cpu: int :returns: :mod:`pandas.DataFrame` - number of frequency transitions """ - if not self._trace.hasEvents('cpu_frequency'): - self._log.warn('Events [cpu_frequency] not found, ' - 'frequency data not available') - return None freq_df = self._trace.df_events('cpu_frequency') cpu_freqs = freq_df[freq_df.cpu == cpu].frequency @@ -135,16 +103,15 @@ class FrequencyAnalysis(AnalysisBase): # a cpu_frequency event is triggered that can generate a duplicate) cpu_freqs = cpu_freqs.loc[cpu_freqs.shift(-1) != cpu_freqs] transitions = cpu_freqs.value_counts() - # Convert frequencies to MHz - transitions.index = transitions.index / 1000 + transitions.name = "transitions" transitions.sort_index(inplace=True) + return pd.DataFrame(transitions) def df_cpu_frequency_transition_rate(self, cpu): """ Compute frequency transition rate of a given CPU. - Requires cpu_frequency events to be available in the trace. :param cpu: a CPU ID :type cpu: int @@ -757,50 +724,36 @@ class FrequencyAnalysis(AnalysisBase): ############################################################################### @memoized - def _get_frequency_residency(self, cluster): + def _get_frequency_residency(self, cpus): """ Get a DataFrame with per cluster frequency residency, i.e. amount of time spent at a given frequency in each cluster. - :param cluster: this can be either a single CPU ID or a list of CPU IDs - belonging to a cluster - :type cluster: int or list(int) + :param cpus: A tuple of CPU IDs + :type cpus: tuple(int) :returns: namedtuple(ResidencyTime) - tuple of total and active time dataframes """ - if not self._trace.hasEvents('cpu_frequency'): - self._log.warning('Events [cpu_frequency] not found, ' - 'frequency residency computation not possible!') - return None - if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, ' - 'frequency residency computation not possible!') - return None - - _cluster = listify(cluster) - freq_df = self._trace.df_events('cpu_frequency') # Assumption: all CPUs in a cluster run at the same frequency, i.e. the # frequency is scaled per-cluster not per-CPU. Hence, we can limit the # cluster frequencies data to a single CPU. This assumption is verified # by the Trace module when parsing the trace. - if len(_cluster) > 1 and not self._trace.freq_coherency: + if len(cpus) > 1 and not self._trace.freq_coherency: self._log.warning('Cluster frequency is NOT coherent,' 'cannot compute residency!') return None - cluster_freqs = freq_df[freq_df.cpu == _cluster[0]] + + cluster_freqs = freq_df[freq_df.cpu == cpus[0]] # Compute TOTAL Time - time_intervals = cluster_freqs.index[1:] - cluster_freqs.index[:-1] - total_time = pd.DataFrame({ - 'time': time_intervals, - 'frequency': [f/1000.0 for f in cluster_freqs.iloc[:-1].frequency] - }) - total_time = total_time.groupby(['frequency']).sum() + cluster_freqs = self._trace.add_events_deltas( + cluster_freqs, col_name="total_time", inplace=False) + time_df = cluster_freqs[["total_time", "frequency"]].groupby(["frequency"]).sum() # Compute ACTIVE Time - cluster_active = self._trace.getClusterActiveSignal(_cluster) + cluster_active = self._trace.analysis.cpus.signal_cluster_active(cpus) # In order to compute the active time spent at each frequency we # multiply 2 square waves: @@ -816,16 +769,14 @@ class FrequencyAnalysis(AnalysisBase): cluster_active.to_frame(name='active'), how='outer') cluster_freqs.fillna(method='ffill', inplace=True) nonidle_time = [] - for f in available_freqs: - freq_active = cluster_freqs.frequency.apply(lambda x: 1 if x == f else 0) + for freq in available_freqs: + freq_active = cluster_freqs.frequency.apply(lambda x: 1 if x == freq else 0) active_t = cluster_freqs.active * freq_active # Compute total time by integrating the square wave nonidle_time.append(self._trace.integrate_square_wave(active_t)) - active_time = pd.DataFrame({'time': nonidle_time}, - index=[f/1000.0 for f in available_freqs]) - active_time.index.name = 'frequency' - return ResidencyTime(total_time, active_time) + time_df["active_time"] = pd.DataFrame(index=available_freqs, data=nonidle_time) + return time_df def _plot_frequency_residency_abs(self, axes, residency, n_plots, is_first, is_last, xmax, title=''): diff --git a/lisa/analysis/idle.py b/lisa/analysis/idle.py index f22719c1a..66d6161cc 100644 --- a/lisa/analysis/idle.py +++ b/lisa/analysis/idle.py @@ -22,7 +22,7 @@ import matplotlib.pyplot as plt import pandas as pd import pylab as pl -from lisa.analysis.base import AnalysisBase, ResidencyTime, ResidencyData +from lisa.analysis.base import AnalysisBase from trappy.utils import listify diff --git a/lisa/analysis/thermal.py b/lisa/analysis/thermal.py index f2b6ff45c..aaab5bd59 100644 --- a/lisa/analysis/thermal.py +++ b/lisa/analysis/thermal.py @@ -27,7 +27,7 @@ import os from trappy.utils import listify from devlib.utils.misc import list_to_mask, mask_to_list -from lisa.analysis.base import AnalysisBase, ResidencyTime, ResidencyData +from lisa.analysis.base import AnalysisBase from lisa.utils import memoized from bart.common.Utils import area_under_curve from matplotlib.ticker import MaxNLocator diff --git a/lisa/trace.py b/lisa/trace.py index 4c503cfa2..7c818e2c7 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -827,50 +827,6 @@ class Trace(Loggable): # Fix sequences of wakeup/sleep events reported with the same index return handle_duplicate_index(cpu_active) - @memoized - def getClusterActiveSignal(self, cluster): - """ - Build a square wave representing the active (i.e. non-idle) cluster - time, i.e.: - - cluster_active[t] == 1 if at least one CPU is reported to be non-idle - by CPUFreq at time t - cluster_active[t] == 0 otherwise - - :param cluster: list of CPU IDs belonging to a cluster - :type cluster: list(int) - - :returns: A :class:`pandas.Series` or ``None`` if the trace contains no - "cpu_idle" events - """ - if not self.hasEvents('cpu_idle'): - self.get_logger().warning('Events [cpu_idle] not found, ' - 'cannot compute cluster active signal!') - return None - - active = self.getCPUActiveSignal(cluster[0]).to_frame(name=cluster[0]) - for cpu in cluster[1:]: - active = active.join( - self.getCPUActiveSignal(cpu).to_frame(name=cpu), - how='outer' - ) - - active.fillna(method='ffill', inplace=True) - # There might be NaNs in the signal where we got data from some CPUs - # before others. That will break the .astype(int) below, so drop rows - # with NaN in them. - active.dropna(inplace=True) - - # Cluster active is the OR between the actives on each CPU - # belonging to that specific cluster - cluster_active = reduce( - operator.or_, - [cpu_active.astype(int) for _, cpu_active in - active.items()] - ) - - return cluster_active - @memoized def getPeripheralClockEffectiveRate(self, clk_name): logger = self.get_logger() -- GitLab From 58451c2d077e59bb2c036e3dabf2c63e4e828c1b Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 14 Nov 2018 14:41:07 +0000 Subject: [PATCH 15/56] analysis: Use Loggable class --- lisa/analysis/base.py | 6 ++-- lisa/analysis/eas.py | 42 ++++++++++++------------ lisa/analysis/frequency.py | 42 ++++++++++++------------ lisa/analysis/functions.py | 2 +- lisa/analysis/idle.py | 12 +++---- lisa/analysis/latency.py | 66 +++++++++++++++++++------------------- lisa/analysis/status.py | 2 +- lisa/analysis/tasks.py | 56 ++++++++++++++++---------------- lisa/analysis/thermal.py | 6 ++-- 9 files changed, 116 insertions(+), 118 deletions(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index 86b92a165..689183608 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -28,9 +28,10 @@ import pylab as pl from trappy.utils import listify """ Helper module for Analysis classes """ +from lisa.utils import Loggable -class AnalysisBase: +class AnalysisBase(Loggable): """ Base class for Analysis modules. @@ -47,11 +48,8 @@ class AnalysisBase: """ def __init__(self, trace): - self._log = logging.getLogger('Analysis') self._trace = trace - plat_info = self._trace.plat_info - @classmethod def setup_plot(cls, width=16, height=4, ncols=1, nrows=1, **kwargs): """ diff --git a/lisa/analysis/eas.py b/lisa/analysis/eas.py index 675d10070..f44e6e55a 100644 --- a/lisa/analysis/eas.py +++ b/lisa/analysis/eas.py @@ -55,52 +55,52 @@ class EasAnalysis(AnalysisBase): Plot energy_diff()-related signals on time axes. """ if not self._trace.hasEvents('sched_energy_diff'): - self._log.warning('Event [sched_energy_diff] not found, plot DISABLED!') + self.get_logger().warning('Event [sched_energy_diff] not found, plot DISABLED!') return df = self._trace.df_events('sched_energy_diff') # Filter on 'tasks' if tasks is not None: - self._log.info('Plotting EDiff data just for task(s) [%s]', tasks) + self.get_logger().info('Plotting EDiff data just for task(s) [%s]', tasks) df = df[df['comm'].isin(tasks)] # Filter on 'usage_delta' if min_usage_delta is not None: - self._log.info('Plotting EDiff data just with minimum ' + self.get_logger().info('Plotting EDiff data just with minimum ' 'usage_delta of [%d]', min_usage_delta) df = df[abs(df['usage_delta']) >= min_usage_delta] if max_usage_delta is not None: - self._log.info('Plotting EDiff data just with maximum ' + self.get_logger().info('Plotting EDiff data just with maximum ' 'usage_delta of [%d]', max_usage_delta) df = df[abs(df['usage_delta']) <= max_usage_delta] # Filter on 'cap_delta' if min_cap_delta is not None: - self._log.info('Plotting EDiff data just with minimum ' + self.get_logger().info('Plotting EDiff data just with minimum ' 'cap_delta of [%d]', min_cap_delta) df = df[abs(df['cap_delta']) >= min_cap_delta] if max_cap_delta is not None: - self._log.info('Plotting EDiff data just with maximum ' + self.get_logger().info('Plotting EDiff data just with maximum ' 'cap_delta of [%d]', max_cap_delta) df = df[abs(df['cap_delta']) <= max_cap_delta] # Filter on 'nrg_delta' if min_nrg_delta is not None: - self._log.info('Plotting EDiff data just with minimum ' + self.get_logger().info('Plotting EDiff data just with minimum ' 'nrg_delta of [%d]', min_nrg_delta) df = df[abs(df['nrg_delta']) >= min_nrg_delta] if max_nrg_delta is not None: - self._log.info('Plotting EDiff data just with maximum ' + self.get_logger().info('Plotting EDiff data just with maximum ' 'nrg_delta of [%d]', max_nrg_delta) df = df[abs(df['nrg_delta']) <= max_nrg_delta] # Filter on 'nrg_diff' if min_nrg_diff is not None: - self._log.info('Plotting EDiff data just with minimum ' + self.get_logger().info('Plotting EDiff data just with minimum ' 'nrg_diff of [%d]', min_nrg_diff) df = df[abs(df['nrg_diff']) >= min_nrg_diff] if max_nrg_diff is not None: - self._log.info('Plotting EDiff data just with maximum ' + self.get_logger().info('Plotting EDiff data just with maximum ' 'nrg_diff of [%d]', max_nrg_diff) df = df[abs(df['nrg_diff']) <= max_nrg_diff] @@ -199,52 +199,52 @@ class EasAnalysis(AnalysisBase): (PxE). """ if not self._trace.hasEvents('sched_energy_diff'): - self._log.warning('Event [sched_energy_diff] not found, plot DISABLED!') + self.get_logger().warning('Event [sched_energy_diff] not found, plot DISABLED!') return df = self._trace.df_events('sched_energy_diff') # Filter on 'tasks' if tasks is not None: - self._log.info('Plotting EDiff data just for task(s) [%s]', tasks) + self.get_logger().info('Plotting EDiff data just for task(s) [%s]', tasks) df = df[df['comm'].isin(tasks)] # Filter on 'usage_delta' if min_usage_delta is not None: - self._log.info('Plotting EDiff data just with minimum ' + self.get_logger().info('Plotting EDiff data just with minimum ' 'usage_delta of [%d]', min_usage_delta) df = df[abs(df['usage_delta']) >= min_usage_delta] if max_usage_delta is not None: - self._log.info('Plotting EDiff data just with maximum ' + self.get_logger().info('Plotting EDiff data just with maximum ' 'usage_delta of [%d]', max_usage_delta) df = df[abs(df['usage_delta']) <= max_usage_delta] # Filter on 'cap_delta' if min_cap_delta is not None: - self._log.info('Plotting EDiff data just with minimum ' + self.get_logger().info('Plotting EDiff data just with minimum ' 'cap_delta of [%d]', min_cap_delta) df = df[abs(df['cap_delta']) >= min_cap_delta] if max_cap_delta is not None: - self._log.info('Plotting EDiff data just with maximum ' + self.get_logger().info('Plotting EDiff data just with maximum ' 'cap_delta of [%d]', max_cap_delta) df = df[abs(df['cap_delta']) <= max_cap_delta] # Filter on 'nrg_delta' if min_nrg_delta is not None: - self._log.info('Plotting EDiff data just with minimum ' + self.get_logger().info('Plotting EDiff data just with minimum ' 'nrg_delta of [%d]', min_nrg_delta) df = df[abs(df['nrg_delta']) >= min_nrg_delta] if max_nrg_delta is not None: - self._log.info('Plotting EDiff data just with maximum ' + self.get_logger().info('Plotting EDiff data just with maximum ' 'nrg_delta of [%d]', max_nrg_delta) df = df[abs(df['nrg_delta']) <= max_nrg_delta] # Filter on 'nrg_diff' if min_nrg_diff is not None: - self._log.info('Plotting EDiff data just with minimum ' + self.get_logger().info('Plotting EDiff data just with minimum ' 'nrg_diff of [%d]', min_nrg_diff) df = df[abs(df['nrg_diff']) >= min_nrg_diff] if max_nrg_diff is not None: - self._log.info('Plotting EDiff data just with maximum ' + self.get_logger().info('Plotting EDiff data just with maximum ' 'nrg_diff of [%d]', max_nrg_diff) df = df[abs(df['nrg_diff']) <= max_nrg_diff] @@ -366,7 +366,7 @@ class EasAnalysis(AnalysisBase): Plot the configuration of SchedTune. """ if not self._trace.hasEvents('sched_tune_config'): - self._log.warning('Event [sched_tune_config] not found, plot DISABLED!') + self.get_logger().warning('Event [sched_tune_config] not found, plot DISABLED!') return # Grid gs = gridspec.GridSpec(2, 1, height_ratios=[4, 1]) diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index 3e1efe2e3..01273e7fc 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -145,7 +145,7 @@ class FrequencyAnalysis(AnalysisBase): """ freq = self._trace.getPeripheralClockEffectiveRate(clk) if freq is None or freq.empty: - self._log.warning('no peripheral clock events found for clock') + self.get_logger().warning('no peripheral clock events found for clock') return fig = plt.figure(figsize=(16,8)) @@ -165,7 +165,7 @@ class FrequencyAnalysis(AnalysisBase): set_rate.plot(style=['b--'], ax=freq_axis, drawstyle='steps-post', alpha=0.4, label="clock_set_rate value") freq_axis.hlines(set_rate.iloc[-1], set_rate.index[-1], self._trace.x_max, linestyle='--', color='b', alpha=0.4) else: - self._log.warning('No clock_set_rate events to plot') + self.get_logger().warning('No clock_set_rate events to plot') # Plot frequency information (effective rate) eff_rate = freq['effective_rate'].dropna() @@ -174,7 +174,7 @@ class FrequencyAnalysis(AnalysisBase): eff_rate.plot(style=['b-'], ax=freq_axis, drawstyle='steps-post', alpha=1.0, label="Effective rate (with on/off)") freq_axis.hlines(eff_rate.iloc[-1], eff_rate.index[-1], self._trace.x_max, linestyle='-', color='b', alpha=1.0) else: - self._log.warning('No effective frequency events to plot') + self.get_logger().warning('No effective frequency events to plot') freq_axis.set_ylim(0, rate_axis_lib * 1.1) freq_axis.set_xlim(self._trace.x_min, self._trace.x_max) @@ -220,7 +220,7 @@ class FrequencyAnalysis(AnalysisBase): :type title: str """ if not self._trace.hasEvents('cpu_frequency'): - self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') + self.get_logger().warning('Events [cpu_frequency] not found, plot DISABLED!') return df = self._trace.df_events('cpu_frequency') @@ -276,7 +276,7 @@ class FrequencyAnalysis(AnalysisBase): bfreq['frequency'].plot(style=['r-'], ax=axes, drawstyle='steps-post', alpha=0.4) else: - self._log.warning('NO big CPUs frequency events to plot') + self.get_logger().warning('NO big CPUs frequency events to plot') axes.set_xlim(self._trace.x_min, self._trace.x_max) axes.set_ylabel('MHz') axes.grid(True) @@ -296,7 +296,7 @@ class FrequencyAnalysis(AnalysisBase): lfreq['frequency'].plot(style=['b-'], ax=axes, drawstyle='steps-post', alpha=0.4) else: - self._log.warning('NO LITTLE CPUs frequency events to plot') + self.get_logger().warning('NO LITTLE CPUs frequency events to plot') axes.set_xlim(self._trace.x_min, self._trace.x_max) axes.set_ylabel('MHz') axes.grid(True) @@ -307,9 +307,9 @@ class FrequencyAnalysis(AnalysisBase): .format(self._trace.plots_dir, self._trace.plots_prefix) pl.savefig(figname, bbox_inches='tight') - self._log.info('LITTLE cluster average frequency: %.3f GHz', + self.get_logger().info('LITTLE cluster average frequency: %.3f GHz', avg_lfreq/1e3) - self._log.info('big cluster average frequency: %.3f GHz', + self.get_logger().info('big cluster average frequency: %.3f GHz', avg_bfreq/1e3) return (avg_lfreq/1e3, avg_bfreq/1e3) @@ -330,7 +330,7 @@ class FrequencyAnalysis(AnalysisBase): :return: a dictionary of average frequency for each CPU. """ if not self._trace.hasEvents('cpu_frequency'): - self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') + self.get_logger().warning('Events [cpu_frequency] not found, plot DISABLED!') return df = self._trace.df_events('cpu_frequency') @@ -349,7 +349,7 @@ class FrequencyAnalysis(AnalysisBase): # Extract CPUs' frequencies and scale them to [MHz] _df = df[df.cpu == cpu_id] if _df.empty: - self._log.warning('No [cpu_frequency] events for CPU%d, ' + self.get_logger().warning('No [cpu_frequency] events for CPU%d, ' 'plot DISABLED!', cpu_id) continue _df['frequency'] = _df.frequency / 1e3 @@ -419,7 +419,7 @@ class FrequencyAnalysis(AnalysisBase): axes.set_xlabel('') avg_freqs[cpu_id] = _avg/1e3 - self._log.info('CPU%02d average frequency: %.3f GHz', + self.get_logger().info('CPU%02d average frequency: %.3f GHz', cpu_id, avg_freqs[cpu_id]) # Save generated plots into datadir @@ -450,10 +450,10 @@ class FrequencyAnalysis(AnalysisBase): :type active: bool """ if not self._trace.hasEvents('cpu_frequency'): - self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') + self.get_logger().warning('Events [cpu_frequency] not found, plot DISABLED!') return if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, plot DISABLED!') + self.get_logger().warning('Events [cpu_idle] not found, plot DISABLED!') return if cpus is None: @@ -505,20 +505,20 @@ class FrequencyAnalysis(AnalysisBase): :type active: bool """ if not self._trace.hasEvents('cpu_frequency'): - self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') + self.get_logger().warning('Events [cpu_frequency] not found, plot DISABLED!') return if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, plot DISABLED!') + self.get_logger().warning('Events [cpu_idle] not found, plot DISABLED!') return if 'clusters' not in self._trace.plat_info: - self._log.warning('No platform cluster info. Plot DISABLED!') + self.get_logger().warning('No platform cluster info. Plot DISABLED!') return # Assumption: all CPUs in a cluster run at the same frequency, i.e. the # frequency is scaled per-cluster not per-CPU. Hence, we can limit the # cluster frequencies data to a single CPU if not self._trace.freq_coherency: - self._log.warning('Cluster frequency is not coherent, plot DISABLED!') + self.get_logger().warning('Cluster frequency is not coherent, plot DISABLED!') return # Sanitize clusters @@ -556,7 +556,7 @@ class FrequencyAnalysis(AnalysisBase): :type pct: bool """ if not self._trace.hasEvents('cpu_frequency'): - self._log.warn('Events [cpu_frequency] not found, plot DISABLED!') + self.get_logger().warn('Events [cpu_frequency] not found, plot DISABLED!') return df = self._trace.df_events('cpu_frequency') @@ -644,11 +644,11 @@ class FrequencyAnalysis(AnalysisBase): :type pct: bool """ if not self._trace.hasEvents('cpu_frequency'): - self._log.warn('Events [cpu_frequency] not found, plot DISABLED!') + self.get_logger().warn('Events [cpu_frequency] not found, plot DISABLED!') return if not self._trace.plat_info or 'clusters' not in self._trace.plat_info: - self._log.warn('No platform cluster info, plot DISABLED!') + self.get_logger().warn('No platform cluster info, plot DISABLED!') return if clusters is None: @@ -741,7 +741,7 @@ class FrequencyAnalysis(AnalysisBase): # cluster frequencies data to a single CPU. This assumption is verified # by the Trace module when parsing the trace. if len(cpus) > 1 and not self._trace.freq_coherency: - self._log.warning('Cluster frequency is NOT coherent,' + self.get_logger().warning('Cluster frequency is NOT coherent,' 'cannot compute residency!') return None diff --git a/lisa/analysis/functions.py b/lisa/analysis/functions.py index 5169416d9..64ecb3e9a 100644 --- a/lisa/analysis/functions.py +++ b/lisa/analysis/functions.py @@ -52,7 +52,7 @@ class FunctionsAnalysis(AnalysisBase): :type metrics: srt or list(str) """ if not hasattr(self._trace, '_functions_stats_df'): - self._log.warning('Functions stats data not available') + self.get_logger().warning('Functions stats data not available') return metrics = listify(metrics) diff --git a/lisa/analysis/idle.py b/lisa/analysis/idle.py index 66d6161cc..751cbc688 100644 --- a/lisa/analysis/idle.py +++ b/lisa/analysis/idle.py @@ -53,7 +53,7 @@ class IdleAnalysis(AnalysisBase): :returns: :mod:`pandas.DataFrame` - idle state residency dataframe """ if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, ' + self.get_logger().warning('Events [cpu_idle] not found, ' 'idle state residency computation not possible!') return None @@ -103,7 +103,7 @@ class IdleAnalysis(AnalysisBase): :returns: :mod:`pandas.DataFrame` - idle state residency dataframe """ if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, ' + self.get_logger().warning('Events [cpu_idle] not found, ' 'idle state residency computation not possible!') return None @@ -112,7 +112,7 @@ class IdleAnalysis(AnalysisBase): try: _cluster = self._trace.plat_info['clusters'][cluster.lower()] except KeyError: - self._log.warning('%s cluster not found!', cluster) + self.get_logger().warning('%s cluster not found!', cluster) return None idle_df = self._trace.df_events('cpu_idle') @@ -179,7 +179,7 @@ class IdleAnalysis(AnalysisBase): :type pct: bool """ if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, ' + self.get_logger().warning('Events [cpu_idle] not found, ' 'plot DISABLED!') return @@ -220,10 +220,10 @@ class IdleAnalysis(AnalysisBase): :type clusters: str ot list(str) """ if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, plot DISABLED!') + self.get_logger().warning('Events [cpu_idle] not found, plot DISABLED!') return if 'clusters' not in self._trace.plat_info: - self._log.warning('No platform cluster info. Plot DISABLED!') + self.get_logger().warning('No platform cluster info. Plot DISABLED!') return # Sanitize clusters diff --git a/lisa/analysis/latency.py b/lisa/analysis/latency.py index 29aef7b28..7032b8111 100644 --- a/lisa/analysis/latency.py +++ b/lisa/analysis/latency.py @@ -78,11 +78,11 @@ class LatencyAnalysis(AnalysisBase): """ if not self._trace.hasEvents('sched_wakeup'): - self._log.warning('Events [sched_wakeup] not found, ' + self.get_logger().warning('Events [sched_wakeup] not found, ' 'cannot compute CPU active signal!') return None if not self._trace.hasEvents('sched_switch'): - self._log.warning('Events [sched_switch] not found, ' + self.get_logger().warning('Events [sched_switch] not found, ' 'cannot compute CPU active signal!') return None @@ -129,15 +129,15 @@ class LatencyAnalysis(AnalysisBase): numbers = 0 for value in task_switches_df.curr_state.unique(): if type(value) is not str: - self._log.warning('The [sched_switch] events contain "prev_state" value [%s]', + self.get_logger().warning('The [sched_switch] events contain "prev_state" value [%s]', value) numbers += 1 if numbers: verb = 'is' if numbers == 1 else 'are' - self._log.warning(' which %s not currently mapped into a task state.', + self.get_logger().warning(' which %s not currently mapped into a task state.', verb) - self._log.warning('Check mappings in:') - self._log.warning(' %s::%s _task_state()', + self.get_logger().warning('Check mappings in:') + self.get_logger().warning(' %s::%s _task_state()', __file__, self.__class__.__name__) # Forward annotate task state @@ -276,7 +276,7 @@ class LatencyAnalysis(AnalysisBase): return cr.runtime if row['next_state'] in ['n']: return cr.runtime - self._log.warning("Unexpected next state: %s @ %f", + self.get_logger().warning("Unexpected next state: %s @ %f", row['next_state'], row['t_start']) return 0 # cr's static variables intialization @@ -345,11 +345,11 @@ class LatencyAnalysis(AnalysisBase): """ if not self._trace.hasEvents('sched_switch'): - self._log.warning('Event [sched_switch] not found, ' + self.get_logger().warning('Event [sched_switch] not found, ' 'plot DISABLED!') return if not self._trace.hasEvents('sched_wakeup'): - self._log.warning('Event [sched_wakeup] not found, ' + self.get_logger().warning('Event [sched_wakeup] not found, ' 'plot DISABLED!') return @@ -364,7 +364,7 @@ class LatencyAnalysis(AnalysisBase): wkp_df = self.df_latency_wakeup(td.pid) if wkp_df is not None: wkp_df.rename(columns={'wakeup_latency' : 'latency'}, inplace=True) - self._log.info('Found: %5d WAKEUP latencies', len(wkp_df)) + self.get_logger().info('Found: %5d WAKEUP latencies', len(wkp_df)) # Load preempt latencies (if required) prt_df = None @@ -372,10 +372,10 @@ class LatencyAnalysis(AnalysisBase): prt_df = self.df_latency_preemption(td.pid) if prt_df is not None: prt_df.rename(columns={'preempt_latency' : 'latency'}, inplace=True) - self._log.info('Found: %5d PREEMPT latencies', len(prt_df)) + self.get_logger().info('Found: %5d PREEMPT latencies', len(prt_df)) if wkp_df is None and prt_df is None: - self._log.warning('No Latency info for task [%s]', td.label) + self.get_logger().warning('No Latency info for task [%s]', td.label) return # Join the two data frames @@ -447,8 +447,8 @@ class LatencyAnalysis(AnalysisBase): # Get latency events df, cdf = self._get_latency_df(task, kind, threshold_ms) - self._log.info('Total: %5d latency events', len(df)) - self._log.info('%.1f %% samples below %d [ms] threshold', + self.get_logger().info('Total: %5d latency events', len(df)) + self.get_logger().info('%.1f %% samples below %d [ms] threshold', 100. * cdf.below, threshold_ms) # Get task data @@ -525,11 +525,11 @@ class LatencyAnalysis(AnalysisBase): :type axes: :mod:`matplotlib.axes.Axes` """ if not self._trace.hasEvents('sched_switch'): - self._log.warning('Event [sched_switch] not found, ' + self.get_logger().warning('Event [sched_switch] not found, ' 'plot DISABLED!') return if not self._trace.hasEvents('sched_wakeup'): - self._log.warning('Event [sched_wakeup] not found, ' + self.get_logger().warning('Event [sched_wakeup] not found, ' 'plot DISABLED!') return @@ -542,7 +542,7 @@ class LatencyAnalysis(AnalysisBase): prt_df = self.df_latency_preemption(td.pid) if wkl_df is None and prt_df is None: - self._log.warning('No task with name [%s]', td.label) + self.get_logger().warning('No task with name [%s]', td.label) return # If not axis provided: generate a standalone plot @@ -621,11 +621,11 @@ class LatencyAnalysis(AnalysisBase): """ if not self._trace.hasEvents('sched_switch'): - self._log.warning('Event [sched_switch] not found, ' + self.get_logger().warning('Event [sched_switch] not found, ' 'plot DISABLED!') return if not self._trace.hasEvents('sched_wakeup'): - self._log.warning('Event [sched_wakeup] not found, ' + self.get_logger().warning('Event [sched_wakeup] not found, ' 'plot DISABLED!') return @@ -638,7 +638,7 @@ class LatencyAnalysis(AnalysisBase): wkp_df = self.df_activations(td.pid) if wkp_df is None: return None - self._log.info('Found: %5d activations for [%s]', + self.get_logger().info('Found: %5d activations for [%s]', len(wkp_df), td.label) # Disregard data above two time the specified threshold @@ -649,14 +649,14 @@ class LatencyAnalysis(AnalysisBase): if len_plt < len_tot: len_dif = len_tot - len_plt len_pct = 100. * len_dif / len_tot - self._log.warning('Discarding {} activation intervals (above 2 x threshold_ms, ' + self.get_logger().warning('Discarding {} activation intervals (above 2 x threshold_ms, ' '{:.1f}% of the overall activations)'\ .format(len_dif, len_pct)) ymax = 1.1 * wkp_df.activation_interval.max() # Build the series for the CDF cdf = self._get_cdf(wkp_df.activation_interval, (threshold_ms / 1000.)) - self._log.info('%.1f %% samples below %d [ms] threshold', + self.get_logger().info('%.1f %% samples below %d [ms] threshold', 100. * cdf.below, threshold_ms) # Setup plots @@ -752,11 +752,11 @@ class LatencyAnalysis(AnalysisBase): """ if not self._trace.hasEvents('sched_switch'): - self._log.warning('Event [sched_switch] not found, ' + self.get_logger().warning('Event [sched_switch] not found, ' 'plot DISABLED!') return if not self._trace.hasEvents('sched_wakeup'): - self._log.warning('Event [sched_wakeup] not found, ' + self.get_logger().warning('Event [sched_wakeup] not found, ' 'plot DISABLED!') return @@ -769,7 +769,7 @@ class LatencyAnalysis(AnalysisBase): run_df = self.df_runtimes(td.pid) if run_df is None: return None - self._log.info('Found: %5d activations for [%s]', + self.get_logger().info('Found: %5d activations for [%s]', len(run_df), td.label) # Disregard data above two time the specified threshold @@ -780,14 +780,14 @@ class LatencyAnalysis(AnalysisBase): if len_plt < len_tot: len_dif = len_tot - len_plt len_pct = 100. * len_dif / len_tot - self._log.warning('Discarding {} running times (above 2 x threshold_ms, ' + self.get_logger().warning('Discarding {} running times (above 2 x threshold_ms, ' '{:.1f}% of the overall activations)'\ .format(len_dif, len_pct)) ymax = 1.1 * run_df.running_time.max() # Build the series for the CDF cdf = self._get_cdf(run_df.running_time, (threshold_ms / 1000.)) - self._log.info('%.1f %% samples below %d [ms] threshold', + self.get_logger().info('%.1f %% samples below %d [ms] threshold', 100. * cdf.below, threshold_ms) # Setup plots @@ -873,16 +873,16 @@ class LatencyAnalysis(AnalysisBase): if isinstance(task, str): task_pids = self._trace.getTaskByName(task) if len(task_pids) == 0: - self._log.warning('No tasks found with name [%s]', task) + self.get_logger().warning('No tasks found with name [%s]', task) return None task_pid = task_pids[0] if len(task_pids) > 1: - self._log.warning('Multiple PIDs for task named [%s]', task) + self.get_logger().warning('Multiple PIDs for task named [%s]', task) for pid in task_pids: - self._log.warning(' %5d : %s', pid, + self.get_logger().warning(' %5d : %s', pid, ','.join(self._trace.getTaskByPid(pid))) - self._log.warning('Returning stats only for PID: %d', + self.get_logger().warning('Returning stats only for PID: %d', task_pid) task_name = self._trace.getTaskByPid(task_pid) @@ -891,7 +891,7 @@ class LatencyAnalysis(AnalysisBase): task_pid = task task_name = self._trace.getTaskByPid(task_pid) if task_name is None: - self._log.warning('No tasks found with name [%s]', task) + self.get_logger().warning('No tasks found with name [%s]', task) return None else: @@ -926,7 +926,7 @@ class LatencyAnalysis(AnalysisBase): try: kernel_version = self._trace.plat_info['kernel-version'] except KeyError: - self._log.info('Parsing task states assuming 3.18 kernel') + self.get_logger().info('Parsing task states assuming 3.18 kernel') kernel_version = KernelVersion('3.18') if kernel_version.parts >= (4, 8): diff --git a/lisa/analysis/status.py b/lisa/analysis/status.py index 3257ee396..95a2d594c 100644 --- a/lisa/analysis/status.py +++ b/lisa/analysis/status.py @@ -81,7 +81,7 @@ class StatusAnalysis(AnalysisBase): :type axes: :mod:`matplotlib.axes.Axes` """ if not self._trace.hasEvents('sched_overutilized'): - self._log.warning('Event [sched_overutilized] not found, ' + self.get_logger().warning('Event [sched_overutilized] not found, ' 'plot DISABLED!') return diff --git a/lisa/analysis/tasks.py b/lisa/analysis/tasks.py index 3a7cd79d4..1efd3d0a9 100644 --- a/lisa/analysis/tasks.py +++ b/lisa/analysis/tasks.py @@ -60,7 +60,7 @@ class TasksAnalysis(AnalysisBase): :type min_utilization: int """ if self.df_load() is None: - self._log.warning('No trace events for task signals, plot DISABLED') + self.get_logger().warning('No trace events for task signals, plot DISABLED') return None if min_utilization is None: @@ -70,13 +70,13 @@ class TasksAnalysis(AnalysisBase): df = self.df_load() big_tasks_events = df[df.util_avg > min_utilization] if not len(big_tasks_events): - self._log.warning('No tasks with with utilization samples > %d', + self.get_logger().warning('No tasks with with utilization samples > %d', min_utilization) return None # Report the number of tasks which match the min_utilization condition big_tasks = big_tasks_events.pid.unique() - self._log.info('%5d tasks with samples of utilization > %d', + self.get_logger().info('%5d tasks with samples of utilization > %d', len(big_tasks), min_utilization) # Compute number of samples above threshold @@ -91,11 +91,11 @@ class TasksAnalysis(AnalysisBase): # Filter for number of occurrences big_tasks_stats = big_tasks_stats[big_tasks_stats['count'] > min_samples] if not len(big_tasks_stats): - self._log.warning(' but none with more than %d samples', + self.get_logger().warning(' but none with more than %d samples', min_samples) return None - self._log.info(' %d with more than %d samples', + self.get_logger().info(' %d with more than %d samples', len(big_tasks_stats), min_samples) # Add task name column @@ -116,7 +116,7 @@ class TasksAnalysis(AnalysisBase): :type min_wakeups: int """ if not self._trace.hasEvents('sched_wakeup'): - self._log.warning('Events [sched_wakeup] not found') + self.get_logger().warning('Events [sched_wakeup] not found') return None df = self._trace.df_events('sched_wakeup') @@ -130,10 +130,10 @@ class TasksAnalysis(AnalysisBase): wkp_tasks_stats = wkp_tasks_stats[ wkp_tasks_stats['count'] > min_wakeups] if not len(df): - self._log.warning('No tasks with more than %d wakeups', + self.get_logger().warning('No tasks with more than %d wakeups', len(wkp_tasks_stats)) return None - self._log.info('%5d tasks with more than %d wakeups', + self.get_logger().info('%5d tasks with more than %d wakeups', len(df), len(wkp_tasks_stats)) # Add task name column @@ -159,7 +159,7 @@ class TasksAnalysis(AnalysisBase): :type min_prio: int """ if not self._trace.hasEvents('sched_switch'): - self._log.warning('Events [sched_switch] not found') + self.get_logger().warning('Events [sched_switch] not found') return None df = self._trace.df_events('sched_switch') @@ -274,7 +274,7 @@ class TasksAnalysis(AnalysisBase): # Check for the minimum required signals to be available if self.df_load() is None: - self._log.warning('No trace events for task signals, plot DISABLED') + self.get_logger().warning('No trace events for task signals, plot DISABLED') return # Defined list of tasks to plot @@ -326,7 +326,7 @@ class TasksAnalysis(AnalysisBase): savefig = False task_name = self._trace.getTaskByPid(tid) - self._log.info('Plotting [%d:%s]...', tid, task_name) + self.get_logger().info('Plotting [%d:%s]...', tid, task_name) plot_id = 0 # For each task create a figure with plots_count plots @@ -351,7 +351,7 @@ class TasksAnalysis(AnalysisBase): signals_to_plot = list(signals_to_plot.intersection(signals)) if len(signals_to_plot) > 0: if not self._trace.has_big_little: - self._log.warning( + self.get_logger().warning( 'No big.LITTLE platform data, residencies plot disabled') else: axes = plt.subplot(gs[plot_id, 0]) @@ -380,7 +380,7 @@ class TasksAnalysis(AnalysisBase): savefig = True if not savefig: - self._log.warning('Nothing to plot for %s', task_name) + self.get_logger().warning('Nothing to plot for %s', task_name) continue # Save generated plots into datadir @@ -423,7 +423,7 @@ class TasksAnalysis(AnalysisBase): big_frequent_tasks_count = len(big_frequent_task_pids) if big_frequent_tasks_count == 0: - self._log.warning('No big/frequent tasks to plot') + self.get_logger().warning('No big/frequent tasks to plot') return # Get the list of events for all big frequent tasks @@ -465,7 +465,7 @@ class TasksAnalysis(AnalysisBase): ax.set_xlabel('Time [s]') - self._log.info('Tasks which have been a "utilization" of %d for at least %d samples', + self.get_logger().info('Tasks which have been a "utilization" of %d for at least %d samples', self._little_cap, min_samples) def plot_wakeup(self, max_tasks=10, min_wakeups=0, per_cluster=False): @@ -484,12 +484,12 @@ class TasksAnalysis(AnalysisBase): """ if per_cluster is True and \ not self._trace.hasEvents('sched_wakeup_new'): - self._log.warning('Events [sched_wakeup_new] not found, ' + self.get_logger().warning('Events [sched_wakeup_new] not found, ' 'plots DISABLED!') return elif not self._trace.hasEvents('sched_wakeup') and \ not self._trace.hasEvents('sched_wakeup_new'): - self._log.warning('Events [sched_wakeup, sched_wakeup_new] not found, ' + self.get_logger().warning('Events [sched_wakeup, sched_wakeup_new] not found, ' 'plots DISABLED!') return @@ -508,10 +508,10 @@ class TasksAnalysis(AnalysisBase): ntlc = df[little_frequent]; ntlc_count = len(ntlc) - self._log.info('%5d tasks forked on big cluster (%3.1f %%)', + self.get_logger().info('%5d tasks forked on big cluster (%3.1f %%)', ntbc_count, 100. * ntbc_count / (ntbc_count + ntlc_count)) - self._log.info('%5d tasks forked on LITTLE cluster (%3.1f %%)', + self.get_logger().info('%5d tasks forked on LITTLE cluster (%3.1f %%)', ntlc_count, 100. * ntlc_count / (ntbc_count + ntlc_count)) @@ -537,7 +537,7 @@ class TasksAnalysis(AnalysisBase): wkp_task_pids = self.df_top_wakeup(min_wakeups) if len(wkp_task_pids): wkp_task_pids = wkp_task_pids.index.values[:max_tasks] - self._log.info('Plotting %d frequent wakeup tasks', + self.get_logger().info('Plotting %d frequent wakeup tasks', len(wkp_task_pids)) ax = axes[0] @@ -581,13 +581,13 @@ class TasksAnalysis(AnalysisBase): """ if not self._trace.hasEvents('cpu_frequency'): - self._log.warning('Events [cpu_frequency] not found') + self.get_logger().warning('Events [cpu_frequency] not found') return # Get all utilization update events df = self.df_load() if df is None: - self._log.warning('No trace events for task signals, plot DISABLED') + self.get_logger().warning('No trace events for task signals, plot DISABLED') return if big_cluster: @@ -604,7 +604,7 @@ class TasksAnalysis(AnalysisBase): big_task_pids = big_task_pids.index.values df = df[df.pid.isin(big_task_pids)] if not df.size: - self._log.warning('No events for tasks with more then %d utilization ' + self.get_logger().warning('No events for tasks with more then %d utilization ' 'samples bigger than %d, plots DISABLED!') return @@ -681,7 +681,7 @@ class TasksAnalysis(AnalysisBase): # Get dataframe for the required task util_df = self.df_load() if util_df is None: - self._log.warning('No trace events for task signals, plot DISABLED') + self.get_logger().warning('No trace events for task signals, plot DISABLED') return # Plot load and util @@ -701,7 +701,7 @@ class TasksAnalysis(AnalysisBase): data.plot(ax=axes, style=['y-'], drawstyle='steps-post') else: task_name = self._trace.getTaskByPid(tid) - self._log.warning('No "boosted_util" data for task [%d:%s]', + self.get_logger().warning('No "boosted_util" data for task [%d:%s]', tid, task_name) # Add Capacities data if avilable @@ -711,7 +711,7 @@ class TasksAnalysis(AnalysisBase): max_bcap = nrg_model['big']['cpu']['cap_max'] tip_lcap = 0.8 * max_lcap tip_bcap = 0.8 * max_bcap - self._log.debug( + self.get_logger().debug( 'LITTLE capacity tip/max: %d/%d, big capacity tip/max: %d/%d', tip_lcap, max_lcap, tip_bcap, max_bcap ) @@ -747,7 +747,7 @@ class TasksAnalysis(AnalysisBase): """ util_df = self.df_load() if util_df is None: - self._log.warning('No trace events for task signals, plot DISABLED') + self.get_logger().warning('No trace events for task signals, plot DISABLED') return data = util_df[util_df.pid == tid][['cluster', 'cpu']] for ccolor, clabel in zip('gr', ['LITTLE', 'big']): @@ -784,7 +784,7 @@ class TasksAnalysis(AnalysisBase): :param signals: list(str) """ if not self._trace.hasEvents('sched_load_avg_task'): - self._log.warning( + self.get_logger().warning( 'No sched_load_avg_task events, skipping PELT plot') return diff --git a/lisa/analysis/thermal.py b/lisa/analysis/thermal.py index aaab5bd59..bc72f0832 100644 --- a/lisa/analysis/thermal.py +++ b/lisa/analysis/thermal.py @@ -148,7 +148,7 @@ class ThermalAnalysis(AnalysisBase): :type thermal_zones: list(int) """ if not self._trace.hasEvents('thermal_temperature'): - self._log.warning('Event [{}] not found, plot DISABLED!' + self.get_logger().warning('Event [{}] not found, plot DISABLED!' .format('thermal_temperature')) return @@ -191,7 +191,7 @@ class ThermalAnalysis(AnalysisBase): :type cpus: list(int) """ if not self._trace.hasEvents('thermal_power_cpu_limit'): - self._log.warning('Event [{}] not found, plot DISABLED!' + self.get_logger().warning('Event [{}] not found, plot DISABLED!' .format('thermal_power_cpu_limit')) return @@ -244,7 +244,7 @@ class ThermalAnalysis(AnalysisBase): :type cpus: list(int) """ if not self._trace.hasEvents('thermal_power_devfreq_limit'): - self._log.warning('Event [{}] not found, plot DISABLED!' + self.get_logger().warning('Event [{}] not found, plot DISABLED!' .format('thermal_power_devfreq_limit')) return -- GitLab From d443e612db2165f11adc35382060e80cfd0cc149 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 14 Nov 2018 14:48:25 +0000 Subject: [PATCH 16/56] analysis: Introduce requires_events() decorator --- lisa/analysis/base.py | 31 +++++++++++++++++++++++++++---- lisa/analysis/cpus.py | 15 ++++++++------- lisa/analysis/frequency.py | 5 ++++- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index 689183608..cd24a58f4 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -17,6 +17,7 @@ import logging from collections import namedtuple +import functools import os import inspect @@ -30,6 +31,28 @@ from trappy.utils import listify """ Helper module for Analysis classes """ from lisa.utils import Loggable +def requires_events(events): + """ + Decorator for methods that require some given trace events + + :param events: The list of required events + :type events: list(str) + + The decorate method must inherit from :class:`AnalysisBase` + """ + def decorator(f): + @functools.wraps(f) + + def wrapper(self, *args, **kwargs): + self.check_events(events) + return f(self, *args, **kwargs) + + # Set an attribute on the wrapper itself, so it can be e.g. added + # to the method documentation + wrapper.required_events = events + return wrapper + + return decorator class AnalysisBase(Loggable): """ @@ -40,8 +63,8 @@ class AnalysisBase(Loggable): :Design notes: - Method depending on certain trace events *must* start with a call to - :meth:`AnalysisBase.check_events`. + Method depending on certain trace events *must* be decorated with + :meth:`lisa.analysis.base.requires_events` Plotting methods *must* return the :class:`matplotlib.axes.Axes` instance used by the plotting method. This lets users embed plots into subplots. @@ -109,8 +132,8 @@ class AnalysisBase(Loggable): :raises: RuntimeError if some events are not available """ - available_events = set(self._trace.events) - missing_events = set(required_events).difference(available_events) + available_events = sorted(set(self._trace.events)) + missing_events = sorted(set(required_events).difference(available_events)) if missing_events: raise RuntimeError( diff --git a/lisa/analysis/cpus.py b/lisa/analysis/cpus.py index b5360a5d0..57b36b7f3 100644 --- a/lisa/analysis/cpus.py +++ b/lisa/analysis/cpus.py @@ -24,7 +24,8 @@ import pandas as pd from trappy.utils import handle_duplicate_index -from lisa.analysis.base import AnalysisBase +from lisa.utils import memoized +from lisa.analysis.base import AnalysisBase, requires_events class CpusAnalysis(AnalysisBase): @@ -45,14 +46,13 @@ class CpusAnalysis(AnalysisBase): # DataFrame Getter Methods ############################################################################### + @requires_events(['sched_switch']) def df_context_switches(self): """ Compute number of context switches on each CPU. :returns: :mod:`pandas.DataFrame` """ - self.check_events(['sched_switch']) - sched_df = self._trace.df_events('sched_switch') cpus = list(range(self._trace.cpus_count)) ctx_sw_df = pd.DataFrame( @@ -64,6 +64,7 @@ class CpusAnalysis(AnalysisBase): return ctx_sw_df + @requires_events(['cpu_idle']) def df_cpu_wakeups(self, cpus=None): """" Get a DataFrame showing when a CPU was woken from idle @@ -75,8 +76,6 @@ class CpusAnalysis(AnalysisBase): row shows a time when the given ``cpu`` was woken up from idle. """ - self.check_events(['cpu_idle']) - cpus = cpus or list(range(self._trace.cpus_count)) sr = pd.Series() @@ -88,6 +87,8 @@ class CpusAnalysis(AnalysisBase): return pd.DataFrame({'cpu': sr}).sort_index() + @memoized + @requires_events(['cpu_idle']) def signal_cpu_active(self, cpu): """ Build a square wave representing the active (i.e. non-idle) CPU time, @@ -103,8 +104,6 @@ class CpusAnalysis(AnalysisBase): :returns: A :class:`pandas.Series` or ``None`` if the trace contains no "cpu_idle" events """ - self.check_events(['cpu_idle']) - idle_df = self._trace.df_events('cpu_idle') cpu_df = idle_df[idle_df.cpu_id == cpu] @@ -125,6 +124,7 @@ class CpusAnalysis(AnalysisBase): # Fix sequences of wakeup/sleep events reported with the same index return handle_duplicate_index(cpu_active) + @requires_events(signal_cpu_active.required_events) def signal_cluster_active(self, cluster): """ Build a square wave representing the active (i.e. non-idle) cluster @@ -169,6 +169,7 @@ class CpusAnalysis(AnalysisBase): # Plotting Methods ############################################################################### + @requires_events(df_context_switches.required_events) def plot_context_switch(self, filepath=None): """ Plot histogram of context switches on each CPU. diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index 01273e7fc..6028f153b 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -24,7 +24,7 @@ import os import pandas as pd import pylab as pl -from lisa.analysis.base import AnalysisBase +from lisa.analysis.base import AnalysisBase, requires_events from lisa.utils import memoized from bart.common.Utils import area_under_curve from matplotlib.ticker import FuncFormatter @@ -86,6 +86,7 @@ class FrequencyAnalysis(AnalysisBase): if cpu in domain: return self._get_frequency_residency(tuple(domain)) + @requires_events(['cpu_frequency']) def df_cpu_frequency_transitions(self, cpu): """ Compute number of frequency transitions of a given CPU. @@ -109,6 +110,7 @@ class FrequencyAnalysis(AnalysisBase): return pd.DataFrame(transitions) + @requires_events(df_cpu_frequency_transitions.required_events) def df_cpu_frequency_transition_rate(self, cpu): """ Compute frequency transition rate of a given CPU. @@ -724,6 +726,7 @@ class FrequencyAnalysis(AnalysisBase): ############################################################################### @memoized + @requires_events(['cpu_frequency', 'cpu_idle']) def _get_frequency_residency(self, cpus): """ Get a DataFrame with per cluster frequency residency, i.e. amount of -- GitLab From 4e4c153a3912e4beed10be136089ef3766697e80 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 14 Nov 2018 14:56:09 +0000 Subject: [PATCH 17/56] analysis: Cleanup imports --- lisa/analysis/base.py | 6 ------ lisa/analysis/frequency.py | 9 +++++---- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index cd24a58f4..02d937304 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -15,20 +15,14 @@ # limitations under the License. # -import logging -from collections import namedtuple import functools import os import inspect -import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt -import pandas as pd -import pylab as pl from trappy.utils import listify -""" Helper module for Analysis classes """ from lisa.utils import Loggable def requires_events(events): diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index 6028f153b..068324b6a 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -17,19 +17,20 @@ """ Frequency Analysis Module """ +import os + import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt -import operator -import os import pandas as pd import pylab as pl -from lisa.analysis.base import AnalysisBase, requires_events -from lisa.utils import memoized from bart.common.Utils import area_under_curve from matplotlib.ticker import FuncFormatter from trappy.utils import listify +from lisa.analysis.base import AnalysisBase, requires_events +from lisa.utils import memoized + class FrequencyAnalysis(AnalysisBase): """ Support for plotting Frequency Analysis data -- GitLab From 6c29a7f820f0b3f4a47b41fb0e79c3b4dc9a7a8f Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 15 Nov 2018 12:46:44 +0000 Subject: [PATCH 18/56] analysis/base: Add some color control helpers --- lisa/analysis/base.py | 55 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index 02d937304..7ee24ced2 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -20,11 +20,20 @@ import os import inspect import matplotlib.pyplot as plt +from cycler import cycler from trappy.utils import listify from lisa.utils import Loggable +# Colorblind-friendly cycle, see https://gist.github.com/thriveth/8560036 +COLOR_CYCLES = [ + '#377eb8', '#ff7f00', '#4daf4a', + '#f781bf', '#a65628', '#984ea3', + '#999999', '#e41a1c', '#dede00'] + +plt.rcParams['axes.prop_cycle'] = cycler(color=COLOR_CYCLES) + def requires_events(events): """ Decorator for methods that require some given trace events @@ -96,6 +105,52 @@ class AnalysisBase(Loggable): plt.tight_layout(h_pad=3.5) return figure, axes + @classmethod + def cycle_colors(cls, axis, nr_cycles): + """ + Cycle the axis color cycle ``nr_cycles`` forward + + :param axis: The axis to manipulate + :type axis: matplotlib.axes.Axes + + :param nr_cycles: The number of colors to cycle through. + :type nr_cycles: int + + .. note:: + + This is an absolute cycle, as in, it will always start from the first + color defined in the color cycle. + + """ + if nr_cycles < 1: + return + + colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] + + if nr_cycles > len(colors): + nr_cycles -= len(colors) + + axis.set_prop_cycle(cycler(color=colors[nr_cycles:] + colors[:nr_cycles])) + + @classmethod + def get_next_color(cls, axis): + """ + Get the next color that will be used to draw lines on the axis + + :param axis: The axis + :type axis: matplotlib.axes.Axes + + .. warning:: + + This will consume the color from the cycler, which means it will + change which color is to be used next. + + """ + # XXX: We're accessing some private data here, so that could break eventually + # Need to find another way to get the current color from the cycler, or to + # plot all data from a dataframe in the same color. + return next(axis._get_lines.prop_cycler)['color'] + def save_plot(self, figure, filepath=None, img_format="png"): """ Save the plot stored in the ``figure`` -- GitLab From bc8d9acb3cb0d21fb119d69f4fb0d290ed38fbaa Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 15 Nov 2018 12:50:27 +0000 Subject: [PATCH 19/56] analysis/load_tracking: Use cycle_colors() --- lisa/analysis/load_tracking.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lisa/analysis/load_tracking.py b/lisa/analysis/load_tracking.py index fca6e0230..36f9b4893 100644 --- a/lisa/analysis/load_tracking.py +++ b/lisa/analysis/load_tracking.py @@ -89,8 +89,9 @@ class LoadTrackingAnalysis(AnalysisBase): cpus_df = self.df_cpus() - for cpu in cpus: + for idx, cpu in enumerate(cpus): axis = axes[cpu] if len(cpus) > 1 else axes + self.cycle_colors(axis, 2 * idx) # Add CPU utilization axis.set_title('CPU{}'.format(cpu)) -- GitLab From 6d6de12bf2d09b0ba758c9e366f83d6b34d432dd Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 15 Nov 2018 12:50:55 +0000 Subject: [PATCH 20/56] analysis/frequency: plot_cpus(): Plot series instead of dataframe This lets use have nice colors --- lisa/analysis/cpus.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lisa/analysis/cpus.py b/lisa/analysis/cpus.py index 57b36b7f3..673256465 100644 --- a/lisa/analysis/cpus.py +++ b/lisa/analysis/cpus.py @@ -177,9 +177,8 @@ class CpusAnalysis(AnalysisBase): fig, axis = self.setup_plot(height=8) ctx_sw_df = self.df_context_switches() - ctx_sw_df.plot.bar(title="Per-CPU Task Context Switches", - legend=False, - ax=axis) + ctx_sw_df["context_switch_cnt"].plot.bar( + title="Per-CPU Task Context Switches", legend=False, ax=axis) axis.grid() self.save_plot(fig, filepath) -- GitLab From 45d54193f045b90318445fd94ec53bd119d62dc7 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 15 Nov 2018 12:51:57 +0000 Subject: [PATCH 21/56] analysis: Rework frequency analysis All methods now work on lisa/next, with the exception of plot_peripheral_clock() which I didn't get to test. On top of all the factorization and cleanups introduced by this commit, the idea was also to get rid of big.LITTLE dependency. We now exclusively rely on frequency domains reported by cpufreq, rather than hardcoded big.LITTLE names. --- lisa/analysis/frequency.py | 891 +++++++++---------------------------- 1 file changed, 222 insertions(+), 669 deletions(-) diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index 068324b6a..350b970c3 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -44,10 +44,65 @@ class FrequencyAnalysis(AnalysisBase): def __init__(self, trace): super(FrequencyAnalysis, self).__init__(trace) -############################################################################### -# DataFrame Getter Methods -############################################################################### + @memoized + @requires_events(['cpu_frequency', 'cpu_idle']) + def _get_frequency_residency(self, cpus): + """ + Get a DataFrame with per cluster frequency residency, i.e. amount of + time spent at a given frequency in each cluster. + + :param cpus: A tuple of CPU IDs + :type cpus: tuple(int) + + :returns: A :class:`pandas.DataFrame` with: + + * A ``total_time`` column (the total time spent at a frequency) + * A ``active_time`` column (the non-idle time spent at a frequency) + """ + freq_df = self._trace.df_events('cpu_frequency') + # Assumption: all CPUs in a cluster run at the same frequency, i.e. the + # frequency is scaled per-cluster not per-CPU. Hence, we can limit the + # cluster frequencies data to a single CPU. This assumption is verified + # by the Trace module when parsing the trace. + if len(cpus) > 1 and not self._trace.freq_coherency: + self.get_logger().warning('Cluster frequency is NOT coherent,' + 'cannot compute residency!') + return None + + cluster_freqs = freq_df[freq_df.cpu == cpus[0]] + + # Compute TOTAL Time + cluster_freqs = self._trace.add_events_deltas( + cluster_freqs, col_name="total_time", inplace=False) + time_df = cluster_freqs[["total_time", "frequency"]].groupby(["frequency"]).sum() + + # Compute ACTIVE Time + cluster_active = self._trace.analysis.cpus.signal_cluster_active(cpus) + + # In order to compute the active time spent at each frequency we + # multiply 2 square waves: + # - cluster_active, a square wave of the form: + # cluster_active[t] == 1 if at least one CPU is reported to be + # non-idle by CPUFreq at time t + # cluster_active[t] == 0 otherwise + # - freq_active, square wave of the form: + # freq_active[t] == 1 if at time t the frequency is f + # freq_active[t] == 0 otherwise + available_freqs = sorted(cluster_freqs.frequency.unique()) + cluster_freqs = cluster_freqs.join( + cluster_active.to_frame(name='active'), how='outer') + cluster_freqs.fillna(method='ffill', inplace=True) + nonidle_time = [] + for freq in available_freqs: + freq_active = cluster_freqs.frequency.apply(lambda x: 1 if x == freq else 0) + active_t = cluster_freqs.active * freq_active + # Compute total time by integrating the square wave + nonidle_time.append(self._trace.integrate_square_wave(active_t)) + + time_df["active_time"] = pd.DataFrame(index=available_freqs, data=nonidle_time) + return time_df + @requires_events(_get_frequency_residency.required_events) def df_cpu_frequency_residency(self, cpu): """ Get per-CPU frequency residency, i.e. amount of @@ -60,16 +115,17 @@ class FrequencyAnalysis(AnalysisBase): time is returned :type total: bool - :returns: :mod:`pandas.DataFrame` - "total" or "active" time residency - at each frequency. + :returns: A :class:`pandas.DataFrame` with: - :raises: TypeError + * A ``total_time`` column (the total time spent at a frequency) + * A ``active_time`` column (the non-idle time spent at a frequency) """ if not isinstance(cpu, int): raise TypeError('Input CPU parameter must be an integer') return self._get_frequency_residency((cpu,)) + @requires_events(_get_frequency_residency.required_events) def df_domain_frequency_residency(self, cpu): """ Get per-frequency-domain frequency residency, i.e. amount of time each @@ -78,9 +134,10 @@ class FrequencyAnalysis(AnalysisBase): :param cpu: Any CPU of the domain to analyse :type cpu: int - :returns: :mod:`pandas.DataFrame` - "total" or "active" time residency - at each frequency. + :returns: A :class:`pandas.DataFrame` with: + * A ``total_time`` column (the total time spent at a frequency) + * A ``active_time`` column (the non-idle time spent at a frequency) """ domains = self._trace.plat_info['freq-domains'] for domain in domains: @@ -95,7 +152,9 @@ class FrequencyAnalysis(AnalysisBase): :param cpu: a CPU ID :type cpu: int - :returns: :mod:`pandas.DataFrame` - number of frequency transitions + :returns: A :class:`pandas.DataFrame` with: + + * A ``transitions`` column (the number of frequency transitions) """ freq_df = self._trace.df_events('cpu_frequency') @@ -119,8 +178,9 @@ class FrequencyAnalysis(AnalysisBase): :param cpu: a CPU ID :type cpu: int - :returns: :mod:`pandas.DataFrame - number of frequency transitions per - second + :returns: A :class:`pandas.DataFrame` with: + + * A ``transitions`` column (the number of frequency transitions per second) """ transitions = self.df_cpu_frequency_transitions(cpu) if transitions is None: @@ -130,6 +190,24 @@ class FrequencyAnalysis(AnalysisBase): lambda x: x / (self._trace.x_max - self._trace.x_min) ) + @requires_events(['cpu_frequency']) + def get_average_cpu_frequency(self, cpu): + """ + Get the average frequency for a given CPU + + :param cpu: The CPU to analyse + :type cpu: int + """ + df = self._trace.df_events('cpu_frequency') + df = df[df.cpu == cpu] + + # We can't use the pandas average because it's not weighted by + # time spent in each frequency, so we have to craft our own. + df = self._trace.add_events_deltas(df, inplace=False) + timespan = df.index[-1] - df.index[0] + + return (df['frequency'] * df['delta']).sum() / timespan + ############################################################################### # Plotting Methods ############################################################################### @@ -213,742 +291,217 @@ class FrequencyAnalysis(AnalysisBase): figname = os.path.join(self._trace.plots_dir, '{}{}.png'.format(self._trace.plots_prefix, clk)) pl.savefig(figname, bbox_inches='tight') - def plot_cluster_frequencies(self, title='Clusters Frequencies'): - """ - Plot frequency trend for all clusters. If sched_overutilized events are - available, the plots will also show the intervals of time where the - cluster was overutilized. - - :param title: user-defined plot title - :type title: str - """ - if not self._trace.hasEvents('cpu_frequency'): - self.get_logger().warning('Events [cpu_frequency] not found, plot DISABLED!') - return - df = self._trace.df_events('cpu_frequency') - - pd.options.mode.chained_assignment = None - - # Extract LITTLE and big clusters frequencies - # and scale them to [MHz] - if self._little_cpus: - lfreq = df[df.cpu == self._little_cpus[-1]] - lfreq['frequency'] = lfreq['frequency']/1e3 - else: - lfreq = [] - if self._big_cpus: - bfreq = df[df.cpu == self._big_cpus[-1]] - bfreq['frequency'] = bfreq['frequency']/1e3 - else: - bfreq = [] - - # Compute AVG frequency for LITTLE cluster - avg_lfreq = 0 - if len(lfreq) > 0: - lfreq['timestamp'] = lfreq.index - lfreq['delta'] = (lfreq['timestamp'] -lfreq['timestamp'].shift()).fillna(0).shift(-1) - lfreq['cfreq'] = (lfreq['frequency'] * lfreq['delta']).fillna(0) - timespan = lfreq.iloc[-1].timestamp - lfreq.iloc[0].timestamp - avg_lfreq = lfreq['cfreq'].sum()/timespan - - # Compute AVG frequency for big cluster - avg_bfreq = 0 - if len(bfreq) > 0: - bfreq['timestamp'] = bfreq.index - bfreq['delta'] = (bfreq['timestamp'] - bfreq['timestamp'].shift()).fillna(0).shift(-1) - bfreq['cfreq'] = (bfreq['frequency'] * bfreq['delta']).fillna(0) - timespan = bfreq.iloc[-1].timestamp - bfreq.iloc[0].timestamp - avg_bfreq = bfreq['cfreq'].sum()/timespan - - pd.options.mode.chained_assignment = 'warn' - - # Setup a dual cluster plot - fig, pltaxes = plt.subplots(2, 1, figsize=(16, 8)) - plt.suptitle(title, y=.97, fontsize=16, horizontalalignment='center') - - # Plot Cluster frequencies - axes = pltaxes[0] - axes.set_title('big Cluster') - if avg_bfreq > 0: - axes.axhline(avg_bfreq, color='r', linestyle='--', linewidth=2) - axes.set_ylim( - (self._trace.plat_info['freqs']['big'][0] - 100000)/1e3, - (self._trace.plat_info['freqs']['big'][-1] + 100000)/1e3 - ) - if len(bfreq) > 0: - bfreq['frequency'].plot(style=['r-'], ax=axes, - drawstyle='steps-post', alpha=0.4) - else: - self.get_logger().warning('NO big CPUs frequency events to plot') - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_ylabel('MHz') - axes.grid(True) - axes.set_xticklabels([]) - axes.set_xlabel('') - self._trace.analysis.status.plot_overutilized(axes) - - axes = pltaxes[1] - axes.set_title('LITTLE Cluster') - if avg_lfreq > 0: - axes.axhline(avg_lfreq, color='b', linestyle='--', linewidth=2) - axes.set_ylim( - (self._trace.plat_info['freqs']['little'][0] - 100000)/1e3, - (self._trace.plat_info['freqs']['little'][-1] + 100000)/1e3 - ) - if len(lfreq) > 0: - lfreq['frequency'].plot(style=['b-'], ax=axes, - drawstyle='steps-post', alpha=0.4) - else: - self.get_logger().warning('NO LITTLE CPUs frequency events to plot') - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_ylabel('MHz') - axes.grid(True) - self._trace.analysis.status.plot_overutilized(axes) - - # Save generated plots into datadir - figname = '{}/{}cluster_freqs.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix) - pl.savefig(figname, bbox_inches='tight') - - self.get_logger().info('LITTLE cluster average frequency: %.3f GHz', - avg_lfreq/1e3) - self.get_logger().info('big cluster average frequency: %.3f GHz', - avg_bfreq/1e3) - - return (avg_lfreq/1e3, avg_bfreq/1e3) - def plot_cpu_frequencies(self, cpus=None): + @requires_events(['cpu_frequency']) + def plot_cpu_frequencies(self, cpu, filepath=None, axis=None): """ - Plot frequency for the specified CPUs (or all if not specified). - If sched_overutilized events are available, the plots will also show the - intervals of time where the system was overutilized. + Plot frequency for the specified CPU - The generated plots are also saved as PNG images under the folder - specified by the `plots_dir` parameter of :class:`Trace`. + :param cpu: The CPU for which to plot frequencies + :type cpus: int - :param cpus: the list of CPUs to plot, if None it generate a plot - for each available CPU - :type cpus: int or list(int) + :param axis: If specified, the axis to use for plotting + :type axis: matplotlib.axes.Axes - :return: a dictionary of average frequency for each CPU. + If ``sched_overutilized`` events are available, the plots will also + show the intervals of time where the system was overutilized. """ - if not self._trace.hasEvents('cpu_frequency'): - self.get_logger().warning('Events [cpu_frequency] not found, plot DISABLED!') - return df = self._trace.df_events('cpu_frequency') + df = df[df.cpu == cpu] - if cpus is None: - # Generate plots only for available CPUs - cpus = list(range(df.cpu.max()+1)) - else: - # Generate plots only specified CPUs - cpus = listify(cpus) - - chained_assignment = pd.options.mode.chained_assignment - pd.options.mode.chained_assignment = None - - freq = {} - for cpu_id in listify(cpus): - # Extract CPUs' frequencies and scale them to [MHz] - _df = df[df.cpu == cpu_id] - if _df.empty: - self.get_logger().warning('No [cpu_frequency] events for CPU%d, ' - 'plot DISABLED!', cpu_id) - continue - _df['frequency'] = _df.frequency / 1e3 - - # Compute AVG frequency for this CPU - avg_freq = 0 - if len(_df) > 1: - timespan = _df.index[-1] - _df.index[0] - avg_freq = area_under_curve(_df['frequency'], method='rect') / timespan - - # Store DF for plotting - freq[cpu_id] = { - 'df' : _df, - 'avg' : avg_freq, - } - - pd.options.mode.chained_assignment = chained_assignment - - plots_count = len(freq) - if not plots_count: - return - - # Setup CPUs plots - fig, pltaxes = plt.subplots(len(freq), 1, figsize=(16, 4 * plots_count)) - - avg_freqs = {} - for plot_idx, cpu_id in enumerate(freq): - - # CPU frequencies and average value - _df = freq[cpu_id]['df'] - _avg = freq[cpu_id]['avg'] - - # Plot average frequency - try: - axes = pltaxes[plot_idx] - except TypeError: - axes = pltaxes - axes.set_title('CPU{:2d} Frequency'.format(cpu_id)) - axes.axhline(_avg, color='r', linestyle='--', linewidth=2) - - # Set plot limit based on CPU min/max frequencies - if 'clusters' in self._trace.plat_info: - for cluster,cpus in self._trace.plat_info['clusters'].items(): - if cpu_id not in cpus: - continue - freqs = self._trace.plat_info['freqs'][cluster] - break - else: - freqs = df['frequency'].unique() + local_fig = not axis - axes.set_ylim((min(freqs) - 100000) / 1e3, - (max(freqs) + 100000) / 1e3) + if local_fig: + fig, axis = self.setup_plot() - # Plot CPU frequency transitions - _df['frequency'].plot(style=['r-'], ax=axes, - drawstyle='steps-post', alpha=0.4) + frequencies = self._trace.plat_info['freqs'][cpu] - # Plot overutilzied regions (if signal available) - self._trace.analysis.status.plot_overutilized(axes) + avg = self.get_average_cpu_frequency(cpu) + self.get_logger().info( + "Average frequency for CPU{} : {:.3f} GHz".format(cpu, avg/1e6)) - # Finalize plot - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_ylabel('MHz') - axes.grid(True) - if plot_idx + 1 < plots_count: - axes.set_xticklabels([]) - axes.set_xlabel('') + df['frequency'].plot( + ax=axis, drawstyle='steps-post') - avg_freqs[cpu_id] = _avg/1e3 - self.get_logger().info('CPU%02d average frequency: %.3f GHz', - cpu_id, avg_freqs[cpu_id]) + if avg > 0: + axis.axhline(avg, color='r', linestyle='--', label="average") - # Save generated plots into datadir - figname = '{}/{}cpus_freqs.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix) - pl.savefig(figname, bbox_inches='tight') + axis.set_ylim(frequencies[0] * 0.9, frequencies[-1] * 1.1) + axis.set_xlim(self._trace.x_min, self._trace.x_max) - return avg_freqs + axis.set_ylabel('Frequency (Hz)') + axis.set_xlabel('Time') + axis.set_title('Frequency of CPU{}'.format(cpu)) + axis.grid(True) + axis.legend() - def plot_cpu_frequency_residency(self, cpus=None, pct=False, active=False): - """ - Plot per-CPU frequency residency. big CPUs are plotted first and then - LITTLEs. + self._trace.analysis.status.plot_overutilized(axis) - Requires the following trace events: - - cpu_frequency - - cpu_idle + if local_fig: + self.save_plot(fig, filepath) - :param cpus: list of CPU IDs. By default plot all CPUs - :type cpus: list(int) or int + return axis - :param pct: plot residencies in percentage - :type pct: bool + @requires_events(plot_cpu_frequencies.required_events) + def plot_domain_frequencies(self, filepath=None): + """ + Plot frequency trend for all frequency domains. - :param active: for percentage plot specify whether to plot active or - total time. Default is TOTAL time - :type active: bool + If ``sched_overutilized`` events are available, the plots will also show + the intervals of time where the cluster was overutilized. """ - if not self._trace.hasEvents('cpu_frequency'): - self.get_logger().warning('Events [cpu_frequency] not found, plot DISABLED!') - return - if not self._trace.hasEvents('cpu_idle'): - self.get_logger().warning('Events [cpu_idle] not found, plot DISABLED!') - return + domains = self._trace.plat_info['freq-domains'] - if cpus is None: - # Generate plots only for available CPUs - cpufreq_data = self._trace.df_events('cpu_frequency') - _cpus = list(range(cpufreq_data.cpu.max()+1)) - else: - _cpus = listify(cpus) + fig, axes = self.setup_plot(nrows=len(domains), sharex=True) + for idx, domain in enumerate(domains): + axis = axes[idx] if len(domains) > 1 else axes - # Split between big and LITTLE CPUs ordered from higher to lower ID - _cpus.reverse() - big_cpus = [c for c in _cpus if c in self._big_cpus] - little_cpus = [c for c in _cpus if c in self._little_cpus] - _cpus = big_cpus + little_cpus + self.cycle_colors(axis, idx) + self.plot_cpu_frequencies(domain[0], filepath, axis) - # Precompute active and total time for each CPU - residencies = [] - xmax = 0.0 - for cpu in _cpus: - res = self._get_frequency_residency(cpu) - residencies.append(ResidencyData('CPU{}'.format(cpu), res)) + axis.set_title('Frequencies of CPUS {}'.format(domain)) - max_time = res.total.max().values[0] - if xmax < max_time: - xmax = max_time + self.save_plot(fig, filepath) - self._plot_frequency_residency(residencies, 'cpu', xmax, pct, active) + return axes - def plot_cluster_frequency_residency(self, clusters=None, - pct=False, active=False): + @requires_events(df_cpu_frequency_residency.required_events) + def plot_cpu_frequency_residency(self, cpu, filepath=None, pct=False, axes=None): """ - Plot the frequency residency in a given cluster, i.e. the amount of - time cluster `cluster` spent at frequency `f_i`. By default, both 'big' - and 'LITTLE' clusters data are plotted. + Plot per-CPU frequency residency. - Requires the following trace events: - - cpu_frequency - - cpu_idle - - :param clusters: name of the clusters to be plotted (all of them by - default) - :type clusters: str ot list(str) + :param cpu: The CPU to generate the plot for + :type cpu: int - :param pct: plot residencies in percentage + :param pct: Plot residencies in percentage :type pct: bool - :param active: for percentage plot specify whether to plot active or - total time. Default is TOTAL time - :type active: bool + :param axes: If specified, the axes to use for plotting + :type axis: numpy.ndarray(matplotlib.axes.Axes) """ - if not self._trace.hasEvents('cpu_frequency'): - self.get_logger().warning('Events [cpu_frequency] not found, plot DISABLED!') - return - if not self._trace.hasEvents('cpu_idle'): - self.get_logger().warning('Events [cpu_idle] not found, plot DISABLED!') - return - if 'clusters' not in self._trace.plat_info: - self.get_logger().warning('No platform cluster info. Plot DISABLED!') - return - - # Assumption: all CPUs in a cluster run at the same frequency, i.e. the - # frequency is scaled per-cluster not per-CPU. Hence, we can limit the - # cluster frequencies data to a single CPU - if not self._trace.freq_coherency: - self.get_logger().warning('Cluster frequency is not coherent, plot DISABLED!') - return - # Sanitize clusters - if clusters is None: - _clusters = list(self._trace.plat_info['clusters'].keys()) - else: - _clusters = listify(clusters) - - # Precompute active and total time for each cluster - residencies = [] - xmax = 0.0 - for cluster in _clusters: - res = self._get_frequency_residency( - self._trace.plat_info['clusters'][cluster.lower()]) - residencies.append(ResidencyData('{} Cluster'.format(cluster), - res)) + local_fig = axes is None - max_time = res.total.max().values[0] - if xmax < max_time: - xmax = max_time + if local_fig: + fig, axes = self.setup_plot(nrows=2) - self._plot_frequency_residency(residencies, 'cluster', xmax, pct, active) - - def plot_cpu_frequency_transitions(self, cpus=None, pct=False): - """ - Plot frequency transitions count of the specified CPUs (or all if not - specified). + residency_df = self.df_cpu_frequency_residency(cpu) - Requires cpu_frequency events to be available in the trace. - - :param cpus: list of CPU IDs (all CPUs by default) - :type clusters: int or list(int) - - :param pct: plot frequency transitions in percentage - :type pct: bool - """ - if not self._trace.hasEvents('cpu_frequency'): - self.get_logger().warn('Events [cpu_frequency] not found, plot DISABLED!') - return - df = self._trace.df_events('cpu_frequency') + total_df = residency_df.total_time + active_df = residency_df.active_time - if cpus is None: - _cpus = list(range(df.cpu.max() + 1)) - else: - _cpus = listify(cpus) + if pct: + total_df = total_df * 100 / total_df.sum() + active_df = active_df * 100 / active_df.sum() - n_plots = len(_cpus) - gs = gridspec.GridSpec(n_plots, 1) - fig = plt.figure() + total_df.plot.barh(ax=axes[0], color=self.get_next_color(axes[0])) + axes[0].set_title("CPU{} total frequency residency".format(cpu)) - # Precompute frequency transitions - transitions = {} - xmax = 0 - for cpu_id in _cpus: - t = self.df_cpu_frequency_transitions(cpu_id) + active_df.plot.barh(ax=axes[1], color=self.get_next_color(axes[0])) + axes[1].set_title("CPU{} active frequency residency".format(cpu)) + for axis in axes: if pct: - tot = t.transitions.sum() - t = t.apply(lambda x: x * 100.0 / tot) + axis.set_xlabel("Time share (%)") + else: + axis.set_xlabel("Time (s)") - transitions[cpu_id] = t - max_cnt = t.transitions.max() - if xmax < max_cnt: xmax = max_cnt + axis.set_ylabel("Frequency (Hz)") + axis.grid(True) - if pct: - yrange = 0.4 * max(6, len(t)) * n_plots - figtype = "_pct" - labeltype = " [%]" - else: - yrange = 3 * n_plots - figtype = "" - labeltype = "" + if local_fig: + self.save_plot(fig, filepath) - for idx, cpu_id in enumerate(_cpus): - t = transitions[cpu_id] + return axes - axes = fig.add_subplot(gs[idx]) - if pct: - t.T.plot.barh(ax=axes, figsize=(16, yrange), - stacked=True, title='CPU{}'.format(cpu_id)) - axes.legend(loc='lower center', ncol=7) - axes.set_xlim(0, 100) - axes.set_yticklabels([]) - else: - t.plot.barh(ax=axes, figsize=(16, yrange), - color='g', legend=False, - title='CPU{}'.format(cpu_id)) - axes.set_xlim(0, xmax*1.05) - axes.grid(True) - axes.set_ylabel('Frequency [MHz]') - - if idx+1 < n_plots: - axes.set_xticklabels([]) - - axes = fig.axes[0] - legend_y = axes.get_ylim()[1] - axes.annotate('OPP Transitions{}'.format(labeltype), - xy=(0, legend_y), xytext=(-50, 25), - textcoords='offset points', fontsize=18) - fig.axes[-1].set_xlabel('Number of transitions{}'.format(labeltype)) - - figname = '{}cpu_freq_transitions{}.png'.format( - self._trace.plots_prefix, figtype) - fig.savefig(os.path.join(self._trace.plots_dir, figname), - bbox_inches='tight') - - def plot_cluster_frequency_transitions(self, clusters=None, pct=False): + @requires_events(plot_cpu_frequency_residency.required_events) + def plot_domain_frequency_residency(self, filepath=None, pct=False): """ - Plot frequency transitions count of the specified clusters (all of them - is not specified). - - Requires cpu_frequency events to be available in the trace. + Plot the frequency residency for all frequency domains. - Notice that we assume that frequency is - scaled at cluster level, therefore we always consider the first CPU of - a cluster for this computation. - - :param clusters: name of the clusters to be plotted (all of them by - default) - :type clusters: str or list(str) - - :param pct: plot frequency transitions in percentage + :param pct: Plot residencies in percentage :type pct: bool """ - if not self._trace.hasEvents('cpu_frequency'): - self.get_logger().warn('Events [cpu_frequency] not found, plot DISABLED!') - return - - if not self._trace.plat_info or 'clusters' not in self._trace.plat_info: - self.get_logger().warn('No platform cluster info, plot DISABLED!') - return - - if clusters is None: - _clusters = list(self._trace.plat_info['clusters'].keys()) - else: - _clusters = listify(clusters) - - n_plots = len(_clusters) - gs = gridspec.GridSpec(n_plots, 1) - fig = plt.figure() - - # Precompute frequency transitions - transitions = {} - xmax = 0 - for c in _clusters: - # We assume frequency is scaled at cluster level and we therefore - # pick information from the first CPU in the cluster. - cpu_id = self._trace.plat_info['clusters'][c.lower()][0] - t = self.df_cpu_frequency_transitions(cpu_id) + domains = self._trace.plat_info['freq-domains'] - if pct: - tot = t.transitions.sum() - t = t.apply(lambda x: x * 100.0 / tot) + fig, axes = self.setup_plot(nrows=2*len(domains), sharex=True) + for idx, domain in enumerate(domains): + local_axes = axes[2 * idx : 2 * (idx + 1)] - transitions[c] = t - max_cnt = t.transitions.max() - if xmax < max_cnt: xmax = max_cnt + for ax in local_axes: + self.cycle_colors(ax, 2 * idx) - if pct: - yrange = 0.4 * max(6, len(t)) * n_plots - figtype = "_pct" - labeltype = " [%]" - else: - yrange = 3 * n_plots - figtype = "" - labeltype = "" + self.plot_cpu_frequency_residency(domain[0], filepath, pct, local_axes) + for axis in local_axes: + title = axis.get_title() + axis.set_title(title.replace("CPU{}".format(domain[0]), "CPUs {}".format(domain))) - for idx, c in enumerate(_clusters): - t = transitions[c] - - axes = fig.add_subplot(gs[idx]) - if pct: - t.T.plot.barh(ax=axes, figsize=(16, yrange), - stacked=True, title='{} Cluster'.format(c)) - axes.legend(loc='lower center', ncol=7) - axes.set_xlim(0, 100) - axes.set_yticklabels([]) - else: - t.plot.barh(ax=axes, figsize=(16, yrange), - color='g', legend=False, - title='{} Cluster'.format(c)) - axes.set_xlim(0, xmax*1.05) - axes.grid(True) - axes.set_ylabel('Frequency [MHz]') - - if idx+1 < n_plots: - axes.set_xticklabels([]) - - axes = fig.axes[0] - legend_y = axes.get_ylim()[1] - axes.annotate('OPP Transitions{}'.format(labeltype), - xy=(0, legend_y), xytext=(-50, 25), - textcoords='offset points', fontsize=18) - fig.axes[-1].set_xlabel('Number of transitions{}'.format(labeltype)) - - figname = '{}cluster_freq_transitions{}.png'.format( - self._trace.plots_prefix, figtype) - fig.savefig(os.path.join(self._trace.plots_dir, figname), - bbox_inches='tight') + self.save_plot(fig, filepath) -############################################################################### -# Utility Methods -############################################################################### + return axes - @memoized - @requires_events(['cpu_frequency', 'cpu_idle']) - def _get_frequency_residency(self, cpus): - """ - Get a DataFrame with per cluster frequency residency, i.e. amount of - time spent at a given frequency in each cluster. - - :param cpus: A tuple of CPU IDs - :type cpus: tuple(int) - - :returns: namedtuple(ResidencyTime) - tuple of total and active time - dataframes + @requires_events(df_cpu_frequency_transitions.required_events) + def plot_cpu_frequency_transitions(self, cpu, filepath=None, pct=False, axis=None): """ - freq_df = self._trace.df_events('cpu_frequency') - # Assumption: all CPUs in a cluster run at the same frequency, i.e. the - # frequency is scaled per-cluster not per-CPU. Hence, we can limit the - # cluster frequencies data to a single CPU. This assumption is verified - # by the Trace module when parsing the trace. - if len(cpus) > 1 and not self._trace.freq_coherency: - self.get_logger().warning('Cluster frequency is NOT coherent,' - 'cannot compute residency!') - return None - - cluster_freqs = freq_df[freq_df.cpu == cpus[0]] - - # Compute TOTAL Time - cluster_freqs = self._trace.add_events_deltas( - cluster_freqs, col_name="total_time", inplace=False) - time_df = cluster_freqs[["total_time", "frequency"]].groupby(["frequency"]).sum() - - # Compute ACTIVE Time - cluster_active = self._trace.analysis.cpus.signal_cluster_active(cpus) - - # In order to compute the active time spent at each frequency we - # multiply 2 square waves: - # - cluster_active, a square wave of the form: - # cluster_active[t] == 1 if at least one CPU is reported to be - # non-idle by CPUFreq at time t - # cluster_active[t] == 0 otherwise - # - freq_active, square wave of the form: - # freq_active[t] == 1 if at time t the frequency is f - # freq_active[t] == 0 otherwise - available_freqs = sorted(cluster_freqs.frequency.unique()) - cluster_freqs = cluster_freqs.join( - cluster_active.to_frame(name='active'), how='outer') - cluster_freqs.fillna(method='ffill', inplace=True) - nonidle_time = [] - for freq in available_freqs: - freq_active = cluster_freqs.frequency.apply(lambda x: 1 if x == freq else 0) - active_t = cluster_freqs.active * freq_active - # Compute total time by integrating the square wave - nonidle_time.append(self._trace.integrate_square_wave(active_t)) + Plot frequency transitions count of the specified CPU - time_df["active_time"] = pd.DataFrame(index=available_freqs, data=nonidle_time) - return time_df + :param cpu: The CPU to genererate the plot for + :type cpu: int - def _plot_frequency_residency_abs(self, axes, residency, n_plots, - is_first, is_last, xmax, title=''): + :param pct: Plot frequency transitions in percentage + :type pct: bool """ - Private method to generate frequency residency plots. + local_fig = axis is None - :param axes: axes over which to generate the plot - :type axes: matplotlib.axes.Axes + if local_fig: + fig, axis = self.setup_plot() - :param residency: tuple of total and active time dataframes - :type residency: namedtuple(ResidencyTime) + df = self.df_cpu_frequency_transitions(cpu) - :param n_plots: total number of plots - :type n_plots: int - - :param is_first: if True this is the first plot - :type is_first: bool + if pct: + df = df * 100 / df.sum() - :param is_last: if True this is the last plot - :type is_last: bool + df["transitions"].plot.barh(ax=axis, color=self.get_next_color(axis)) - :param xmax: x-axes higher bound - :param xmax: double + axis.set_title('Frequency transitions of CPU{}'.format(cpu)) - :param title: title of this subplot - :type title: str - """ - yrange = 0.4 * max(6, len(residency.total)) * n_plots - residency.total.plot.barh(ax=axes, color='g', - legend=False, figsize=(16, yrange)) - residency.active.plot.barh(ax=axes, color='r', - legend=False, figsize=(16, yrange)) - - axes.set_xlim(0, 1.05*xmax) - axes.set_ylabel('Frequency [MHz]') - axes.set_title(title) - axes.grid(True) - if is_last: - axes.set_xlabel('Time [s]') + if pct: + axis.set_xlabel("Transitions share (%)") else: - axes.set_xticklabels([]) - - if is_first: - # Put title on top of the figure. As of now there is no clean way - # to make the title appear always in the same position in the - # figure because figure heights may vary between different - # platforms (different number of OPPs). Hence, we use annotation - legend_y = axes.get_ylim()[1] - axes.annotate('OPP Residency Time', xy=(0, legend_y), - xytext=(-50, 45), textcoords='offset points', - fontsize=18) - axes.annotate('GREEN: Total', xy=(0, legend_y), - xytext=(-50, 25), textcoords='offset points', - color='g', fontsize=14) - axes.annotate('RED: Active', xy=(0, legend_y), - xytext=(50, 25), textcoords='offset points', - color='r', fontsize=14) - - def _plot_frequency_residency_pct(self, axes, residency_df, label, - n_plots, is_first, is_last, res_type): - """ - Private method to generate PERCENTAGE frequency residency plots. + axis.set_xlabel("Transition count") - :param axes: axes over which to generate the plot - :type axes: matplotlib.axes.Axes + axis.set_ylabel("Frequency (Hz)") + axis.grid(True) - :param residency_df: residency time dataframe - :type residency_df: :mod:`pandas.DataFrame` + if local_fig: + self.save_plot(fig, filepath) - :param label: label to be used for percentage residency dataframe - :type label: str + return axis - :param n_plots: total number of plots - :type n_plots: int - - :param is_first: if True this is the first plot - :type is_first: bool - - :param is_first: if True this is the last plot - :type is_first: bool - - :param res_type: type of residency, either TOTAL or ACTIVE - :type title: str + @requires_events(plot_cpu_frequency_transitions.required_events) + def plot_domain_frequency_transitions(self, filepath=None, pct=False): """ - # Compute sum of the time intervals - duration = residency_df.time.sum() - residency_pct = pd.DataFrame( - {label: residency_df.time.apply(lambda x: x*100/duration)}, - index=residency_df.index - ) - yrange = 3 * n_plots - residency_pct.T.plot.barh(ax=axes, stacked=True, figsize=(16, yrange)) - - axes.legend(loc='lower center', ncol=7) - axes.set_xlim(0, 100) - axes.grid(True) - if is_last: - axes.set_xlabel('Residency [%]') - else: - axes.set_xticklabels([]) - if is_first: - legend_y = axes.get_ylim()[1] - axes.annotate('OPP {} Residency Time'.format(res_type), - xy=(0, legend_y), xytext=(-50, 35), - textcoords='offset points', fontsize=18) - - def _plot_frequency_residency(self, residencies, entity_name, xmax, - pct, active): + Plot frequency transitions count for all frequency domains + + :param pct: Plot frequency transitions in percentage + :type pct: bool """ - Generate Frequency residency plots for the given entities. + domains = self._trace.plat_info['freq-domains'] - :param residencies: list of residencies to be plotted - :type residencies: list(namedtuple(ResidencyData)) - each tuple - contains: - - a label to be used as subplot title - - a namedtuple(ResidencyTime) + fig, axes = self.setup_plot(nrows=len(domains)) - :param entity_name: name of the entity ('cpu' or 'cluster') used in the - figure name - :type entity_name: str + for idx, domain in enumerate(domains): + axis = axes[idx] + self.cycle_colors(axis, idx) - :param xmax: upper bound of x-axes - :type xmax: double + self.plot_cpu_frequency_transitions(domain[0], filepath, pct, axis) - :param pct: plot residencies in percentage - :type pct: bool + title = axis.get_title() + axis.set_title(title.replace("CPU{}".format(domain[0]), "CPUs {}".format(domain))) - :param active: for percentage plot specify whether to plot active or - total time. Default is TOTAL time - :type active: bool - """ - n_plots = len(residencies) - gs = gridspec.GridSpec(n_plots, 1) - fig = plt.figure() - - figtype = "" - for idx, data in enumerate(residencies): - if data.residency is None: - plt.close(fig) - return - - axes = fig.add_subplot(gs[idx]) - is_first = idx == 0 - is_last = idx+1 == n_plots - if pct and active: - self._plot_frequency_residency_pct(axes, data.residency.active, - data.label, n_plots, - is_first, is_last, - 'ACTIVE') - figtype = "_pct_active" - continue - if pct: - self._plot_frequency_residency_pct(axes, data.residency.total, - data.label, n_plots, - is_first, is_last, - 'TOTAL') - figtype = "_pct_total" - continue - - self._plot_frequency_residency_abs(axes, data.residency, - n_plots, is_first, - is_last, xmax, - title=data.label) - - figname = '{}/{}{}_freq_residency{}.png'\ - .format(self._trace.plots_dir, - self._trace.plots_prefix, - entity_name, figtype) - pl.savefig(figname, bbox_inches='tight') + self.save_plot(fig, filepath) + + return axes # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 -- GitLab From 438d36281037d6a3e070dcc3d5d8b2ea999b2984 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 15 Nov 2018 12:55:05 +0000 Subject: [PATCH 22/56] analysis: Global docstring cleanup --- lisa/analysis/base.py | 9 +++++---- lisa/analysis/cpus.py | 37 +++++++++++++------------------------ 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index 7ee24ced2..3400ac606 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -70,7 +70,7 @@ class AnalysisBase(Loggable): :meth:`lisa.analysis.base.requires_events` Plotting methods *must* return the :class:`matplotlib.axes.Axes` instance - used by the plotting method. This lets users embed plots into subplots. + used by the plotting method. This lets users further modify them. """ def __init__(self, trace): @@ -93,10 +93,11 @@ class AnalysisBase(Loggable): :param nrows: Number of plots in a single column :type nrows: int - :Keywords arguments: Extra arguments to pass to :meth:`matplotlib.subplots` + :Keywords arguments: Extra arguments to pass to + :obj:`matplotlib.pyplot.subplots` :returns: tuple(matplotlib.figure.Figure, matplotlib.axes.Axes (or an - (array of, if ``nrows`` > 1)) + array of, if ``nrows`` > 1)) """ figure, axes = plt.subplots( ncols=ncols, nrows=nrows, figsize=(width, height * nrows), **kwargs @@ -156,7 +157,7 @@ class AnalysisBase(Loggable): Save the plot stored in the ``figure`` :param figure: The plot figure - :type figure: matplotlib.figure.figure + :type figure: matplotlib.figure.Figure :param filepath: The path of the file into which the plot will be saved. If ``None``, a path based on the trace directory and the calling method diff --git a/lisa/analysis/cpus.py b/lisa/analysis/cpus.py index 673256465..e3cd6dc4c 100644 --- a/lisa/analysis/cpus.py +++ b/lisa/analysis/cpus.py @@ -30,10 +30,7 @@ from lisa.analysis.base import AnalysisBase, requires_events class CpusAnalysis(AnalysisBase): """ - Support for CPUs Signals Analysis - - :param trace: input Trace object - :type trace: :class:`Trace` + Support for CPUs signals analysis """ name = 'cpus' @@ -51,7 +48,9 @@ class CpusAnalysis(AnalysisBase): """ Compute number of context switches on each CPU. - :returns: :mod:`pandas.DataFrame` + :returns: A :class:`pandas.DataFrame` with: + + * A ``context_switch_cnt`` column (the number of context switch per CPU) """ sched_df = self._trace.df_events('sched_switch') cpus = list(range(self._trace.cpus_count)) @@ -72,9 +71,9 @@ class CpusAnalysis(AnalysisBase): :param cpus: List of CPUs to find wakeups for. If None, all CPUs. :type cpus: list(int) or None - :returns: :mod:`pandas.DataFrame` with one column ``cpu``, where each - row shows a time when the given ``cpu`` was woken up from - idle. + :returns: A :class:`pandas.DataFrame` with + + * A ``cpu`` column (the CPU that woke up at the row index) """ cpus = cpus or list(range(self._trace.cpus_count)) @@ -91,18 +90,13 @@ class CpusAnalysis(AnalysisBase): @requires_events(['cpu_idle']) def signal_cpu_active(self, cpu): """ - Build a square wave representing the active (i.e. non-idle) CPU time, - i.e.: - - cpu_active[t] == 1 if the CPU is reported to be non-idle by cpuidle at - time t - cpu_active[t] == 0 otherwise + Build a square wave representing the active (i.e. non-idle) CPU time :param cpu: CPU ID :type cpu: int - :returns: A :class:`pandas.Series` or ``None`` if the trace contains no - "cpu_idle" events + :returns: A :class:`pandas.Series` that equals 1 at timestamps where the + CPU is reported to be non-idle, 0 otherwise """ idle_df = self._trace.df_events('cpu_idle') cpu_df = idle_df[idle_df.cpu_id == cpu] @@ -127,18 +121,13 @@ class CpusAnalysis(AnalysisBase): @requires_events(signal_cpu_active.required_events) def signal_cluster_active(self, cluster): """ - Build a square wave representing the active (i.e. non-idle) cluster - time, i.e.: - - cluster_active[t] == 1 if at least one CPU is reported to be non-idle - by CPUFreq at time t - cluster_active[t] == 0 otherwise + Build a square wave representing the active (i.e. non-idle) cluster time :param cluster: list of CPU IDs belonging to a cluster :type cluster: list(int) - :returns: A :class:`pandas.Series` or ``None`` if the trace contains no - "cpu_idle" events + :returns: A :class:`pandas.Series` that equals 1 at timestamps where at + least one CPU is reported to be non-idle, 0 otherwise """ active = self.signal_cpu_active(cluster[0]).to_frame(name=cluster[0]) for cpu in cluster[1:]: -- GitLab From 29102b1a4bcf8b822b4d5d28086c694906d23f6c Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 16 Nov 2018 20:05:09 +0000 Subject: [PATCH 23/56] trace: un-camelCase some getters --- lisa/trace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lisa/trace.py b/lisa/trace.py index 7c818e2c7..1dd4cd626 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -333,7 +333,7 @@ class Trace(Loggable): name_key : 'TaskName'}) .set_index('PID').sort_index()) - def getTaskByName(self, name): + def get_task_by_name(self, name): """ Get the PIDs of all tasks with the specified name. @@ -354,7 +354,7 @@ class Trace(Loggable): return (self._tasks_by_pid[self._tasks_by_pid.TaskName == name] .index.tolist()) - def getTaskByPid(self, pid): + def get_task_by_pid(self, pid): """ Get the name of the task with the specified PID. -- GitLab From 500ac76c5de1f8ec1e64fad256cc37cc63d4148d Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 16 Nov 2018 20:05:38 +0000 Subject: [PATCH 24/56] trace: Add a (pid or name) to pid helper --- lisa/trace.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/lisa/trace.py b/lisa/trace.py index 1dd4cd626..9c82786fc 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -377,7 +377,27 @@ class Trace(Loggable): except KeyError: return None - def getTasks(self): + def get_task_pid(self, task): + """ + Helper that takes either a name or a PID and always returns a PID + + :param task: Either the task name or the task PID + :type task: int or str + """ + if isinstance(task, str): + pid_list = self.get_task_by_name(task) + if len(pid_list) > 1: + self.get_logger().warning( + "More than one PID found for task {}, " + "using the first one ({})".format(task, pid_list[0])) + pid = pid_list[0] + else: + pid = task + + return pid + + + def get_tasks(self): """ Get a dictionary of all the tasks in the Trace. -- GitLab From a3a3f36d7017c8f3e95e9c72d421a00843a1df46 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 20 Nov 2018 16:06:02 +0000 Subject: [PATCH 25/56] analysis/frequency: Remove stray parameter docstring --- lisa/analysis/frequency.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index 350b970c3..1f8793e00 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -111,10 +111,6 @@ class FrequencyAnalysis(AnalysisBase): :param cpu: CPU ID :type cpu: int - :param total: if true returns the "total" time, otherwise the "active" - time is returned - :type total: bool - :returns: A :class:`pandas.DataFrame` with: * A ``total_time`` column (the total time spent at a frequency) -- GitLab From 3c7f4a4a0716d221a1fb53bf9829a0c286ab0369 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 20 Nov 2018 16:11:52 +0000 Subject: [PATCH 26/56] trace: Fix references to obsolete plat_info keys --- lisa/trace.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lisa/trace.py b/lisa/trace.py index 9c82786fc..306e1b610 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -677,7 +677,7 @@ class Trace(Loggable): """ logger = self.get_logger() if not self.hasEvents('cpu_frequency_devlib') \ - or 'clusters' not in self.plat_info: + or 'freq-domains' not in self.plat_info: return devlib_freq = self.df_events('cpu_frequency_devlib') @@ -685,7 +685,7 @@ class Trace(Loggable): devlib_freq.rename(columns={'state':'frequency'}, inplace=True) df = self.df_events('cpu_frequency') - clusters = self.plat_info['clusters'] + domains = self.plat_info['freq-domains'] # devlib always introduces fake cpu_frequency events, in case the # OS has not generated cpu_frequency envets there are the only @@ -710,29 +710,29 @@ class Trace(Loggable): # Inject "initial" devlib frequencies os_df = df dl_df = devlib_freq.iloc[:self.cpus_count] - for _,c in self.plat_info['clusters'].items(): - dl_freqs = dl_df[dl_df.cpu.isin(c)] - os_freqs = os_df[os_df.cpu.isin(c)] - logger.debug("First freqs for %s:\n%s", c, dl_freqs) + for cpus in domains: + dl_freqs = dl_df[dl_df.cpu.isin(cpus)] + os_freqs = os_df[os_df.cpu.isin(cpus)] + logger.debug("First freqs for %s:\n%s", cpus, dl_freqs) # All devlib events "before" os-generated events logger.debug("Min os freq @: %s", os_freqs.index.min()) if os_freqs.empty or \ os_freqs.index.min() > dl_freqs.index.max(): - logger.debug("Insert devlib freqs for %s", c) + logger.debug("Insert devlib freqs for %s", cpus) df = pd.concat([dl_freqs, df]) # Inject "final" devlib frequencies os_df = df dl_df = devlib_freq.iloc[self.cpus_count:] - for _,c in self.plat_info['clusters'].items(): - dl_freqs = dl_df[dl_df.cpu.isin(c)] - os_freqs = os_df[os_df.cpu.isin(c)] - logger.debug("Last freqs for %s:\n%s", c, dl_freqs) + for cpus in domains: + dl_freqs = dl_df[dl_df.cpu.isin(cpus)] + os_freqs = os_df[os_df.cpu.isin(cpus)] + logger.debug("Last freqs for %s:\n%s", cpus, dl_freqs) # All devlib events "after" os-generated events logger.debug("Max os freq @: %s", os_freqs.index.max()) if os_freqs.empty or \ os_freqs.index.max() < dl_freqs.index.min(): - logger.debug("Append devlib freqs for %s", c) + logger.debug("Append devlib freqs for %s", cpus) df = pd.concat([df, dl_freqs]) df.sort_index(inplace=True) @@ -740,7 +740,7 @@ class Trace(Loggable): setattr(self.ftrace.cpu_frequency, 'data_frame', df) # Frequency Coherency Check - for _, cpus in clusters.items(): + for cpus in domains: cluster_df = df[df.cpu.isin(cpus)] for chunk in self._chunker(cluster_df, len(cpus)): f = chunk.iloc[0].frequency -- GitLab From bf70574eb5103a4d8fd054c20905c255fd42831e Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 20 Nov 2018 16:27:44 +0000 Subject: [PATCH 27/56] analysis/cpus: Add a plot_orig_capacity() method --- lisa/analysis/cpus.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lisa/analysis/cpus.py b/lisa/analysis/cpus.py index e3cd6dc4c..0207fca0e 100644 --- a/lisa/analysis/cpus.py +++ b/lisa/analysis/cpus.py @@ -173,4 +173,19 @@ class CpusAnalysis(AnalysisBase): self.save_plot(fig, filepath) return axis + def plot_orig_capacity(self, axis, cpu): + """ + Plot the orig capacity of a CPU onto a given axis + + :param axis: The axis + :type axis: matplotlib.axes.Axes + + :param cpu: The CPU + :type cpu: int + """ + if "cpu-capacities" in self._trace.plat_info: + axis.axhline(self._trace.plat_info["cpu-capacities"][cpu], + color=self.get_next_color(axis), + linestyle='--', label="orig_capacity") + # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 -- GitLab From aa4f26ebb2133d49511a28cb5f90f33b367f0a69 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 20 Nov 2018 16:29:10 +0000 Subject: [PATCH 28/56] analysis: Rework tasks analysis This also shuffles some task-related analysis code to/from latency and load_tracking. --- lisa/analysis/latency.py | 221 +------ lisa/analysis/load_tracking.py | 283 +++++++-- lisa/analysis/tasks.py | 1009 +++++++++++--------------------- 3 files changed, 596 insertions(+), 917 deletions(-) diff --git a/lisa/analysis/latency.py b/lisa/analysis/latency.py index 7032b8111..240e7d1e3 100644 --- a/lisa/analysis/latency.py +++ b/lisa/analysis/latency.py @@ -53,114 +53,6 @@ class LatencyAnalysis(AnalysisBase): # DataFrame Getter Methods ############################################################################### - @memoized - def df_latency(self, task): - """ - DataFrame of task's wakeup/suspend events - - The returned DataFrame index is the time, in seconds, an event related - to `task` happened. - The DataFrame has these columns: - - target_cpu: the CPU where the task has been scheduled - reported only for wakeup events - - curr_state: the current task state: - A letter which corresponds to the standard events reported by the - prev_state field of a sched_switch event. - Only exception is 'A', which is used to represent active tasks, - i.e. tasks RUNNING on a CPU - - next_state: the next status for the task - - t_start: the time when the current status started, it matches Time - - t_delta: the interval of time after witch the task will switch to the - next_state - - :param task: the task to report wakeup latencies for - :type task: int or str - """ - - if not self._trace.hasEvents('sched_wakeup'): - self.get_logger().warning('Events [sched_wakeup] not found, ' - 'cannot compute CPU active signal!') - return None - if not self._trace.hasEvents('sched_switch'): - self.get_logger().warning('Events [sched_switch] not found, ' - 'cannot compute CPU active signal!') - return None - - # Get task data - td = self._get_task_data(task) - if not td: - return None - - wk_df = self._trace.df_events('sched_wakeup') - sw_df = self._trace.df_events('sched_switch') - - # Filter Task's WAKEUP events - task_wakeup = wk_df[wk_df.pid == td.pid][['target_cpu', 'pid']] - - # Filter Task's START events - task_events = (sw_df.prev_pid == td.pid) | (sw_df.next_pid == td.pid) - task_switches_df = sw_df[task_events]\ - [['__cpu', 'prev_pid', 'next_pid', 'prev_state']] - - # Unset prev_state for switch_in events, i.e. - # we don't care about the status of a task we are replacing - task_switches_df.prev_state = task_switches_df.apply( - lambda r : np.nan if r['prev_pid'] != td.pid - else self._task_state(r['prev_state']), - axis=1) - - # Rename prev_state - task_switches_df.rename(columns={'prev_state' : 'curr_state'}, inplace=True) - - # Fill in Running status - # We've just set curr_state (a.k.a prev_state) to nan where td.pid was - # switching in, so set the state to 'A' ("active") in those places. - task_switches_df.curr_state = task_switches_df.curr_state.fillna(value='A') - - # Join Wakeup and SchedSwitch events - task_latency_df = task_wakeup.join(task_switches_df, how='outer', - lsuffix='_wkp', rsuffix='_slp') - # Remove not required columns - task_latency_df = task_latency_df[['target_cpu', '__cpu', 'curr_state']] - # Set Wakeup state on each Wakeup event - task_latency_df.curr_state = task_latency_df.curr_state.fillna(value='W') - - # Sanity check for all task states to be mapped to a char - numbers = 0 - for value in task_switches_df.curr_state.unique(): - if type(value) is not str: - self.get_logger().warning('The [sched_switch] events contain "prev_state" value [%s]', - value) - numbers += 1 - if numbers: - verb = 'is' if numbers == 1 else 'are' - self.get_logger().warning(' which %s not currently mapped into a task state.', - verb) - self.get_logger().warning('Check mappings in:') - self.get_logger().warning(' %s::%s _task_state()', - __file__, self.__class__.__name__) - - # Forward annotate task state - task_latency_df['next_state'] = task_latency_df.curr_state.shift(-1) - - # Forward account for previous state duration - task_latency_df['t_start'] = task_latency_df.index - task_latency_df['t_delta'] = ( - task_latency_df['t_start'].shift(-1) - - task_latency_df['t_start'] - ) - - # Fix the last entry, which will have a NaN state duration - # Set duration to trace_end - last_event - task_latency_df.loc[task_latency_df.index[-1], 't_delta'] = ( - self._trace.start_time + - self._trace.time_range - - task_latency_df.index[-1] - ) - - return task_latency_df - - # Select Wakeup latency def df_latency_wakeup(self, task): """ @@ -290,42 +182,6 @@ class LatencyAnalysis(AnalysisBase): run_df = run_df[run_df.next_state.isin(['S', 'x'])][['running_time']] return run_df - @memoized - def df_task_residency(self, task): - """ - DataFrame of a task's execution time on each CPU - - The returned DataFrame index is the CPU indexes - The DataFrame has just one column: - - runtime: the time the task spent being active on a given CPU, - in seconds. - - :param task: the task to report runtimes for - :type task: int or str - """ - cpus = list(range(self._trace.plat_info['cpus-count'])) - runtimes = {cpu : 0.0 for cpu in cpus} - - df = self.df_latency(task) - - # Exclude sleep time - df = df[df.curr_state != 'S'] - - for time, data in df.iterrows(): - cpu = data['__cpu'] - - # When waking up, '__cpu' is NaN but 'target_cpu' is populated instead - if np.isnan(cpu): - if data['curr_state'] == 'W': - cpu = data['target_cpu'] - else: - raise RuntimeError('No CPU data for latency_df @{}'.format(time)) - - runtimes[cpu] += data['t_delta'] - - data = [(cpu, time) for cpu, time in runtimes.items()] - return pd.DataFrame(data, columns=['CPU', 'runtime']).set_index('CPU') - @memoized def _get_latency_df(self, task, kind='all', threshold_ms=1): """ @@ -839,29 +695,6 @@ class LatencyAnalysis(AnalysisBase): return stats_df.append(pd.DataFrame( list(stats.values()), columns=['running_time'], index=list(stats.keys()))) - def plot_task_residency(self, task): - """ - Plot CPU residency of the specified task - This will show an overview of how much time that task spent being - active on each available CPU, in seconds. - - :param task: the task to report runtimes for - :type task: int or str - """ - df = self.df_task_residency(task) - - ax = df.plot(kind='bar', figsize=(16, 6)) - ax.set_title('CPU residency of task {}'.format(task)) - - figname = os.path.join( - self._trace.plots_dir, - '{}task_cpu_residency_{}.png'.format( - self._trace.plots_prefix, task - ) - ) - - pl.savefig(figname, bbox_inches='tight') - ############################################################################### # Utility Methods ############################################################################### @@ -871,7 +704,7 @@ class LatencyAnalysis(AnalysisBase): # Get task PID if isinstance(task, str): - task_pids = self._trace.getTaskByName(task) + task_pids = self._trace.get_task_by_name(task) if len(task_pids) == 0: self.get_logger().warning('No tasks found with name [%s]', task) return None @@ -881,15 +714,15 @@ class LatencyAnalysis(AnalysisBase): self.get_logger().warning('Multiple PIDs for task named [%s]', task) for pid in task_pids: self.get_logger().warning(' %5d : %s', pid, - ','.join(self._trace.getTaskByPid(pid))) + ','.join(self._trace.get_task_by_pid(pid))) self.get_logger().warning('Returning stats only for PID: %d', task_pid) - task_name = self._trace.getTaskByPid(task_pid) + task_name = self._trace.get_task_by_pid(task_pid) # Get task name elif isinstance(task, int): task_pid = task - task_name = self._trace.getTaskByPid(task_pid) + task_name = self._trace.get_task_by_pid(task_pid) if task_name is None: self.get_logger().warning('No tasks found with name [%s]', task) return None @@ -900,52 +733,6 @@ class LatencyAnalysis(AnalysisBase): task_label = "{}: {}".format(task_pid, task_name) return TaskData(task_pid, task_name, task_label) - @memoized - def _task_state(self, state): - try: - state = int(state) - except ValueError: - # State already converted to symbol - return state - - # Tasks STATE flags (Linux 3.18) - TASK_STATES = { - 0: "R", # TASK_RUNNING - 1: "S", # TASK_INTERRUPTIBLE - 2: "D", # TASK_UNINTERRUPTIBLE - 4: "T", # __TASK_STOPPED - 8: "t", # __TASK_TRACED - 16: "X", # EXIT_DEAD - 32: "Z", # EXIT_ZOMBIE - 64: "x", # TASK_DEAD - 128: "K", # TASK_WAKEKILL - 256: "W", # TASK_WAKING - 512: "P", # TASK_PARKED - 1024: "N", # TASK_NOLOAD - } - try: - kernel_version = self._trace.plat_info['kernel-version'] - except KeyError: - self.get_logger().info('Parsing task states assuming 3.18 kernel') - kernel_version = KernelVersion('3.18') - - if kernel_version.parts >= (4, 8): - TASK_STATES[2048] = "n" # TASK_NEW - TASK_MAX_STATE = 2 * max(TASK_STATES) - - res = "R" - if state & (TASK_MAX_STATE - 1) != 0: - res = "" - for key in list(TASK_STATES.keys()): - if key & state: - res += TASK_STATES[key] - if state & TASK_MAX_STATE: - res += "+" - else: - res = '|'.join(res) - return res - - def _get_cdf(self, data, threshold): """ Build the "Cumulative Distribution Function" (CDF) for the given data diff --git a/lisa/analysis/load_tracking.py b/lisa/analysis/load_tracking.py index 36f9b4893..cd7062dde 100644 --- a/lisa/analysis/load_tracking.py +++ b/lisa/analysis/load_tracking.py @@ -37,49 +37,143 @@ class LoadTrackingAnalysis(AnalysisBase): def __init__(self, trace): super().__init__(trace) - def df_cpus(self): + @classmethod + def _columns_renaming(cls, event): """ - A DataFrame containing per-CPU load tracking signals + Columns to rename to unify dataframes between trace event versions """ - try: - event = 'sched_load_cfs_rq' - self.check_events([event]) - except RuntimeError: - event = 'sched_load_avg_cpu' - self.check_events([event]) + if event in ['sched_load_avg_cpu', 'sched_load_avg_task']: + return { + "util_avg" : "util", + "load_avg" : "load" + } + return {} + + @classmethod + def _columns_to_drop(cls, event): + """ + The extra columns not shared between trace event versions + """ + if event in ['sched_load_cfs_rq', 'sched_load_se']: + return ['path', 'rbl_load', 'cpu'] + + return [] + + def _df_uniformized_signal(self, event): df = self._trace.df_events(event) - if event == 'sched_cfs_rq': - df = df[df.path == '/'] - df.drop('rbl_load') - else: - pass + + df = df.rename(self._columns_renaming(event)) + + if event == 'sched_load_se': + df = df[df.path == "(null)"] + + if event == 'sched_load_cfs_rq': + df = df[df.path == "/"] + + to_drop = self._columns_to_drop(event) + if to_drop: + df = df.drop(to_drop, axis=1) return df - def df_tasks(self): + + def _df_either_event(self, events): + for event in events: + if event not in self._trace.events: + continue + + return self._df_uniformized_signal(event) + + raise RuntimeError("Trace is missing one of either events: {}".format(events)) + + def df_cpus_signals(self): """ - A DataFrame containing per-task load tracking signals + Get the load-tracking signals for the CPUs + + :returns: a :class:`pandas.DataFrame` with: + + * A ``util`` column (the average utilization of a CPU at time t) + * A ``load`` column (the average load of a CPU at time t) + + :Required events: + Either of: + + * ``sched_load_cfs_rq`` + * ``sched_load_avg_cpu`` """ - try: - event = 'sched_load_se' - self.check_events([event]) - except RuntimeError: - event = 'sched_load_avg_task' - self.check_events([event]) + return self._df_either_event(['sched_load_cfs_rq', 'sched_load_avg_cpu']) - df = self._trace.df_events(event) - if event == 'sched_load_se': - df = df[df.path == '(null)'] - else: - pass + def df_tasks_signals(self): + """ + Get the load-tracking signals for the tasks + + :returns: a :class:`pandas.DataFrame` with: + + * A ``util`` column (the average utilization of a task at time t) + * A ``load`` column (the average load of a task at time t) + + If CPU capacity information is available: + + * A ``required_capacity`` column (the minimum available CPU capacity + required to run this task without being CPU-bound) + + :Required events: + Either of: + + * ``sched_load_se`` + * ``sched_load_avg_task`` + """ + df = self._df_either_event(['sched_load_se', 'sched_load_avg_task']) + + if "cpu-capacities" in self._trace.plat_info: + # Add a column which represents the max capacity of the smallest + # CPU which can accomodate the task utilization + capacities = sorted(self._trace.plat_info["cpu-capacities"].values()) + + def fits_capacity(util): + for capacity in capacities: + if util <= capacity: + return capacity + + return capacities[-1] + + df["required_capacity"] = df.util.map(fits_capacity) return df + def df_top_big_tasks(self, util_threshold, min_samples=100): + """ + Tasks which had 'utilization' samples bigger than the specified + threshold + + :param min_samples: minumum number of samples over the min_utilization + :type min_samples: int - def plot_cpus(self, filepath=None, cpus=None): + :param min_utilization: minimum utilization used to filter samples + default: capacity of a little cluster + :type min_utilization: int + + :returns: a :class:`pandas.DataFrame` with: + + * Task PIDs as index + * A ``samples`` column (The number of util samples above the threshold) """ - Plot CPU-related signals + df = self.df_tasks_signals() + + # Compute number of samples above threshold + samples = df[df.util > util_threshold].groupby('pid').count()["util"] + samples = samples[samples > min_samples] + samples = samples.sort_values(ascending=False) + + top_df = pd.DataFrame(samples).rename(columns={"util" : "samples"}) + top_df["comm"] = top_df.index.map(self._trace.get_task_by_pid) + + return top_df + + def plot_cpus_signals(self, cpus=None, filepath=None): + """ + Plot the CPU-related load-tracking signals :param cpus: list of CPUs to be plotted :type cpus: list(int) @@ -87,7 +181,7 @@ class LoadTrackingAnalysis(AnalysisBase): cpus = cpus or list(range(self._trace.cpus_count)) fig, axes = self.setup_plot(nrows=len(cpus), sharex=True) - cpus_df = self.df_cpus() + cpus_df = self.df_cpus_signals() for idx, cpu in enumerate(cpus): axis = axes[cpu] if len(cpus) > 1 else axes @@ -95,16 +189,17 @@ class LoadTrackingAnalysis(AnalysisBase): # Add CPU utilization axis.set_title('CPU{}'.format(cpu)) - df = cpus_df[cpus_df.cpu == cpu] + df = cpus_df[cpus_df["__cpu"] == cpu] + + df[['util']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) + df[['load']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) - if len(df): - df[['util']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) - df[['load']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) + self._trace.analysis.cpus.plot_orig_capacity(axis, cpu) # Add capacities data if available if self._trace.hasEvents('cpu_capacity'): df = self._trace.df_events('cpu_capacity') - df = df[df.cpu == cpu] + df = df[df["__cpu"] == cpu] if len(df): data = df[['capacity', 'tip_capacity']] data.plot(ax=axis, style=['m', '--y'], @@ -115,6 +210,126 @@ class LoadTrackingAnalysis(AnalysisBase): axis.set_ylim(0, 1100) axis.set_xlim(self._trace.x_min, self._trace.x_max) + axis.legend() self.save_plot(fig, filepath) return axes + + def plot_task_signals(self, task, filepath=None): + """ + Plot the task-related load-tracking signals + + :param task: The name or PID of the task + :type task: str or int + """ + fig, axis = self.setup_plot() + + if isinstance(task, str): + pid_list = self._trace.get_task_by_name(task) + if len(pid_list) > 1: + self.get_logger().warning( + "More than one PID found for task {}, using the first one".format(task)) + pid = pid_list[0] + else: + pid = task + + df = self.df_tasks_signals() + df = df[df.pid == pid] + + df[['util']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) + df[['load']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) + + axis.set_title('Load-tracking signals of task "{}"'.format(task)) + axis.legend() + axis.grid(True) + + self.save_plot(fig, filepath) + return axis + + def plot_task_required_capacity(self, task, filepath=None, axis=None): + """ + Plot the minimum required capacity of a task + + :param task: The name or PID of the task + :type task: str or int + + :param axis: If provided, overlay the required capacity on this axis + :type axis: matplotlib.axes.Axes + """ + local_fig = axis is None + + if local_fig: + fig, axis = self.setup_plot(height=8) + + pid = self._trace.get_task_pid(task) + + df = self.df_tasks_signals() + df = df[df.pid == pid] + + # Build task names (there could be multiple, during the task lifetime) + task_name = 'Task ({}:{})'.format(pid, self._trace.get_task_by_pid(pid)) + + df["required_capacity"].plot( + drawstyle='steps-post', + ax=axis) + + axis.legend() + axis.grid(True) + + if local_fig: + axis.set_title(task_name) + axis.set_ylim(0, 1100) + axis.set_xlim(self._trace.x_min, self._trace.x_max) + axis.set_ylabel('Utilization') + axis.set_xlabel('Time (s)') + + self.save_plot(fig, filepath) + + return axis + + def plot_task_placement(self, task, filepath=None): + """ + Plot the CPU placement of the task + + :param task: The name or PID of the task + :type task: str or int + """ + fig, axis = self.setup_plot() + + # Get all utilization update events + df = self.df_tasks_signals() + + pid = self._trace.get_task_pid(task) + df = df[df.pid == pid] + + cpu_capacities = self._trace.plat_info["cpu-capacities"] + + def evaluate_placement(cpu, required_capacity): + capacity = cpu_capacities[cpu] + + if capacity < required_capacity: + return "CPU capacity < required capacity" + elif capacity == required_capacity: + return "CPU capacity == required capacity" + else: + return "CPU capacity > required capacity" + + df["placement"] = df.apply( + lambda row: evaluate_placement( + row["__cpu"], + row["required_capacity"]), axis=1) + + for stat in df["placement"].unique(): + df[df.placement == stat]["__cpu"].plot(ax=axis, style="+", label=stat) + + axis.set_title("Utilization vs placement of task \"{}\"".format(task)) + + axis.set_xlim(self._trace.x_min, self._trace.x_max) + axis.grid(True) + axis.legend() + + self._trace.analysis.status.plot_overutilized(axis) + + self.save_plot(fig, filepath) + + return axis diff --git a/lisa/analysis/tasks.py b/lisa/analysis/tasks.py index 1efd3d0a9..3e40eb12e 100644 --- a/lisa/analysis/tasks.py +++ b/lisa/analysis/tasks.py @@ -15,19 +15,98 @@ # limitations under the License. # -""" Tasks Analysis Module """ +from enum import Enum -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt import numpy as np import pandas as pd -import pylab as pl -import re -from lisa.analysis.base import AnalysisBase +from lisa.analysis.base import AnalysisBase, requires_events from lisa.utils import memoized -from trappy.utils import listify +class StateInt(int): + """ + An tweaked int for :class:`lisa.analysis.tasks.TaskState` + """ + def __new__(cls, value, char="", doc=""): + new = super().__new__(cls, value) + new.char = char + new.__doc__ = doc + return new + + def __or__(self, other): + char = self.char + + if other.char: + char = "|".join(char + other.char) + + return type(self)( + int(self) | int(other), + char=(self.char + other.char)) + +class TaskState(StateInt, Enum): + """ + Represents the task state as visible in sched_switch + + * Values are extracted from include/linux/sched.h + * Chars are extracted from fs/proc/array.c:get_task_state() + """ + #pylint-suppress: bad-whitespace + TASK_RUNNING = 0x0000, "R", "Running" + TASK_INTERRUPTIBLE = 0x0001, "S", "Sleeping" + TASK_UNINTERRUPTIBLE = 0x0002, "D", "Disk sleep" + # __ has a special meaning in Python so let's not do that + TASK_STOPPED = 0x0004, "T", "Stopped" + TASK_TRACED = 0x0008, "t", "Tracing stop" + + EXIT_DEAD = 0x0010, "X", "Dead" + EXIT_ZOMBIE = 0x0020, "Z", "Zombie" + + # Apparently not visible in traces + # EXIT_TRACE = (EXIT_ZOMBIE[0] | EXIT_DEAD[0]) + + TASK_PARKED = 0x0040, "P", "Parked" + TASK_DEAD = 0x0080, "I", "Idle" + TASK_WAKEKILL = 0x0100 + TASK_WAKING = 0x0200, "W", "Waking" # LISA-only char definition + TASK_NOLOAD = 0x0400 + TASK_NEW = 0x0800 + TASK_STATE_MAX = 0x1000 + + # LISA-only, used to differenciate runnable (R) vs running (A) + TASK_ACTIVE = 0x2000, "A", "Active" + + @classmethod + def list_reported_states(cls): + """ + List the states that can be reported in a ``sched_switch`` trace + + See include/linux/sched.h:TASK_REPORT + """ + return [state for state in list(cls) if state <= cls.TASK_DEAD] + + # Could use IntFlag instead once we move to Python 3.6 + @classmethod + def sched_switch_str(cls, value): + """ + Get the task state string that would be used in a ``sched_switch`` event + + :param value: The task state value + :type value: int + + Tries to emulate what is done in include/trace/events:TRACE_EVENT(sched_switch) + """ + if any([value & state.value for state in cls.list_reported_states()]): + res = "|".join([state.char for state in cls.list_reported_states() + if state.value & value]) + else: + res = cls.TASK_RUNNING.char + + # Flag the presence of unreportable states with a "+" + if any([value & state.value for state in list(cls) + if state not in cls.list_reported_states()]): + res += "+" + + return res class TasksAnalysis(AnalysisBase): """ @@ -42,72 +121,29 @@ class TasksAnalysis(AnalysisBase): def __init__(self, trace): super(TasksAnalysis, self).__init__(trace) - ############################################################################### # DataFrame Getter Methods ############################################################################### - def df_top_big_tasks(self, min_samples=100, min_utilization=None): + @requires_events(['sched_wakeup']) + def df_tasks_wakeups(self): """ - Tasks which had 'utilization' samples bigger than the specified - threshold + The number of wakeups per task - :param min_samples: minumum number of samples over the min_utilization - :type min_samples: int + :returns: a :class:`pandas.DataFrame` with: - :param min_utilization: minimum utilization used to filter samples - default: capacity of a little cluster - :type min_utilization: int + * Task PIDs as index + * A ``wakeups`` column (The number of wakeups) """ - if self.df_load() is None: - self.get_logger().warning('No trace events for task signals, plot DISABLED') - return None - - if min_utilization is None: - min_utilization = self._little_cap - - # Get utilization samples >= min_utilization - df = self.df_load() - big_tasks_events = df[df.util_avg > min_utilization] - if not len(big_tasks_events): - self.get_logger().warning('No tasks with with utilization samples > %d', - min_utilization) - return None - - # Report the number of tasks which match the min_utilization condition - big_tasks = big_tasks_events.pid.unique() - self.get_logger().info('%5d tasks with samples of utilization > %d', - len(big_tasks), min_utilization) + df = self._trace.df_events('sched_wakeup') - # Compute number of samples above threshold - desc = big_tasks_events.groupby('pid').describe(include=['object']) - if isinstance(desc.index, pd.MultiIndex): - # We must be running on a pre-0.20.0 version of pandas. - # unstack will convert the old output format to the new. - # http://pandas.pydata.org/pandas-docs/version/0.20/whatsnew.html#groupby-describe-formatting - desc = desc.unstack() - big_tasks_stats = desc['comm'].sort_values(by=['count'], ascending=False) - - # Filter for number of occurrences - big_tasks_stats = big_tasks_stats[big_tasks_stats['count'] > min_samples] - if not len(big_tasks_stats): - self.get_logger().warning(' but none with more than %d samples', - min_samples) - return None - - self.get_logger().info(' %d with more than %d samples', - len(big_tasks_stats), min_samples) - - # Add task name column - big_tasks_stats['comm'] = big_tasks_stats.index.map( - lambda pid: self._trace.getTaskByPid(pid)) + wakeups = df.groupby('pid').count()["comm"] + df = pd.DataFrame(wakeups).rename(columns={"comm" : "wakeups"}) + df["comm"] = df.index.map(self._trace.get_task_by_pid) - # Filter columns of interest - big_tasks_stats = big_tasks_stats[['count', 'comm']] - big_tasks_stats.rename(columns={'count': 'samples'}, inplace=True) - - return big_tasks_stats + return df + @requires_events(df_tasks_wakeups.required_events) def df_top_wakeup(self, min_wakeups=100): """ Tasks which wakeup more frequently than a specified threshold. @@ -115,53 +151,33 @@ class TasksAnalysis(AnalysisBase): :param min_wakeups: minimum number of wakeups :type min_wakeups: int """ - if not self._trace.hasEvents('sched_wakeup'): - self.get_logger().warning('Events [sched_wakeup] not found') - return None - - df = self._trace.df_events('sched_wakeup') - - # Compute number of wakeups above threshold - wkp_tasks_stats = df.groupby('pid').describe(include=['object']) - wkp_tasks_stats = wkp_tasks_stats.unstack()['comm']\ - .sort_values(by=['count'], ascending=False) - - # Filter for number of occurrences - wkp_tasks_stats = wkp_tasks_stats[ - wkp_tasks_stats['count'] > min_wakeups] - if not len(df): - self.get_logger().warning('No tasks with more than %d wakeups', - len(wkp_tasks_stats)) - return None - self.get_logger().info('%5d tasks with more than %d wakeups', - len(df), len(wkp_tasks_stats)) - - # Add task name column - wkp_tasks_stats['comm'] = wkp_tasks_stats.index.map( - lambda pid: self._trace.getTaskByPid(pid)) + df = self.df_tasks_wakeups() - # Filter columns of interest - wkp_tasks_stats = wkp_tasks_stats[['count', 'comm']] - wkp_tasks_stats.rename(columns={'count': 'samples'}, inplace=True) + # Compute number of samples above threshold + df = df[df.wakeups > min_wakeups] + df = df.sort_values(by="wakeups", ascending=False) - return wkp_tasks_stats + return df + @requires_events(['sched_switch']) def df_rt_tasks(self, min_prio=100): """ Tasks with RT priority - NOTE: priorities uses scheduler values, thus: the lower the value the - higher is the task priority. - RT Priorities: [ 0..100] - FAIR Priorities: [101..120] + .. note:: priorities uses scheduler values, thus: the lower the value the + higher is the task priority. + RT Priorities: [ 0..100] + FAIR Priorities: [101..120] - :param min_prio: minumum priority + :param min_prio: minimum priority :type min_prio: int - """ - if not self._trace.hasEvents('sched_switch'): - self.get_logger().warning('Events [sched_switch] not found') - return None + :returns: a :class:`pandas.DataFrame` with: + + * Task PIDs as index + * A ``prio`` column (The priority of the task) + * A ``comm`` column (The name of the task) + """ df = self._trace.df_events('sched_switch') # Filters tasks which have a priority bigger than threshold @@ -169,635 +185,296 @@ class TasksAnalysis(AnalysisBase): # Filter columns of interest rt_tasks = df[['next_pid', 'next_prio']] - - # Remove all duplicateds rt_tasks = rt_tasks.drop_duplicates() # Order by priority - rt_tasks.sort_values(by=['next_prio', 'next_pid'], ascending=True, - inplace=True) - rt_tasks.rename(columns={'next_pid': 'pid', 'next_prio': 'prio'}, - inplace=True) + rt_tasks.sort_values( + by=['next_prio', 'next_pid'], ascending=True, inplace=True) + rt_tasks.rename( + columns={'next_pid': 'pid', 'next_prio': 'prio'}, inplace=True) - # Set PID as index rt_tasks.set_index('pid', inplace=True) - - # Add task name column - rt_tasks['comm'] = rt_tasks.index.map( - lambda pid: self._trace.getTaskByPid(pid)) + rt_tasks['comm'] = rt_tasks.index.map(self._trace.get_task_by_pid) return rt_tasks - def df_load(self): + @requires_events(['sched_switch', 'sched_wakeup']) + def df_task_states(self, task): """ - Get a DataFrame with the scheduler's per-task load-tracking signals + DataFrame of task's state updates events + + :param task: The task's name or PID + :type task: int or str - Parse the relevant trace event and return a DataFrame with the - scheduler's load tracking update events for each task. + :returns: a :class:`pandas.DataFrame` with: - :returns: DataFrame with at least the following columns: - 'comm', 'pid', 'load_avg', 'util_avg'. + * A ``target_cpu`` column (the CPU where the task has been scheduled). + Will be ``NaN`` for non-wakeup events + * A ``curr_state`` column (the current task state, see :class:`~TaskState`) + * A ``next_state`` column (the next task state, see :class:`~TaskState`) + * A ``delta`` column (the duration for which the task will remain in + this state) """ - df = None + pid = self._trace.get_task_pid(task) - if 'sched_load_avg_task' in self._trace.available_events: - df = self._trace.df_events('sched_load_avg_task') + wk_df = self._trace.df_events('sched_wakeup') + sw_df = self._trace.df_events('sched_switch') - elif 'sched_load_se' in self._trace.available_events: - df = self._trace.df_events('sched_load_se') - df = df.rename(columns={'util': 'util_avg', 'load': 'load_avg'}) - # In sched_load_se, PID shows -1 for task groups. - df = df[df.pid != -1] + task_wakeup = wk_df[wk_df.pid == pid][['target_cpu', '__cpu']] + task_wakeup['curr_state'] = TaskState.TASK_WAKING.char - if not self._trace.has_big_little: - return df + task_switches_df = sw_df[ + (sw_df.prev_pid == pid) | + (sw_df.next_pid == pid) + ][['__cpu', 'prev_pid', 'prev_state']] - df['cluster'] = np.select( - [df.cpu.isin(self._trace.plat_info['clusters']['little'])], - ['LITTLE'], 'big') + def stringify_row_state(row): + if row.prev_pid != pid: + # This is a switch-in event + # (we don't care about the status of a task we are replacing) + return TaskState.TASK_ACTIVE.char - if 'nrg-model' in self._trace.plat_info: - # Add a column which represents the max capacity of the smallest - # clustre which can accomodate the task utilization - little_cap = self._trace.plat_info['nrg-model']['little']['cpu']['cap_max'] - big_cap = self._trace.plat_info['nrg-model']['big']['cpu']['cap_max'] - df['min_cluster_cap'] = df.util_avg.map( - lambda util_avg: big_cap if util_avg > little_cap else little_cap - ) + return TaskState.sched_switch_str(row.prev_state) - return df + task_switches_df.prev_state = task_switches_df.apply( + stringify_row_state, axis=1) + + task_switches_df = task_switches_df.drop(columns=["prev_pid"]) + + task_switches_df.rename(columns={'prev_state' : 'curr_state'}, inplace=True) + + # Integer values are prefered here, otherwise the whole column + # is converted to float64 + task_switches_df['target_cpu'] = -1 + + task_state_df = task_wakeup.append(task_switches_df, sort=True).sort_index() + + task_state_df.rename(columns={'__cpu' : 'cpu'}, inplace=True) + task_state_df = task_state_df[['target_cpu', 'cpu', 'curr_state']] + task_state_df['next_state'] = task_state_df.curr_state.shift(-1) + self._trace.add_events_deltas(task_state_df, inplace=True) + + return task_state_df + + @requires_events(df_task_states.required_events) + def df_task_total_residency(self, task): + """ + DataFrame of a task's execution time on each CPU + + :param task: the task to report runtimes for + :type task: int or str + + :returns: a :class:`pandas.DataFrame` with: + + * CPU IDs as index + * A ``runtime`` column (the time the task spent being active) + """ + cpus = set(range(self._trace.plat_info['cpus-count'])) + + df = self.df_task_states(task) + df = df[df.curr_state == TaskState.TASK_ACTIVE.char] + + residency_df = pd.DataFrame(df.groupby("cpu")["delta"].sum()) + residency_df.rename(columns={"delta" : "runtime"}, inplace=True) + + cpus_present = set(residency_df.index.unique()) + + for cpu in cpus.difference(cpus_present): + residency_df.loc[cpu] = 0. + + residency_df.sort_index(inplace=True) + + return residency_df ############################################################################### # Plotting Methods ############################################################################### - def plot_tasks(self, tasks, signals=None): + @requires_events(['sched_switch']) + def plot_task_residency(self, task, filepath=None): """ - Generate a common set of useful plots for each of the specified tasks - - This method allows to filter which signals should be plot, if data are - available in the input trace. The list of signals supported are: - Tasks signals plot: - load_avg, util_avg, boosted_util, sched_overutilized - Tasks residencies on CPUs: - residencies, sched_overutilized - Tasks PELT signals: - load_sum, util_sum, period_contrib, sched_overutilized - - At least one of the previous signals must be specified to get a valid - plot. - - Addidional custom signals can be specified and they will be represented - in the "Task signals plots" if they represent valid keys of the task - load/utilization trace event (e.g. sched_load_avg_task). - - Note: - sched_overutilized: enable the plotting of overutilization bands on - top of each subplot - residencies: enable the generation of the CPUs residencies plot - - :param tasks: the list of task names and/or PIDs to plot. - Numerical PIDs and string task names can be mixed - in the same list. - :type tasks: list(str) or list(int) - - :param signals: list of signals (and thus plots) to generate - default: all the plots and signals available in the - current trace - :type signals: list(str) + Plot on which CPUs the task ran on over time + + :param task: """ - if not signals: - signals = ['load_avg', 'util_avg', 'boosted_util', - 'sched_overutilized', - 'load_sum', 'util_sum', 'period_contrib', - 'residencies'] - - # Check for the minimum required signals to be available - if self.df_load() is None: - self.get_logger().warning('No trace events for task signals, plot DISABLED') - return - - # Defined list of tasks to plot - if tasks and \ - not isinstance(tasks, str) and \ - not isinstance(tasks, list): - raise ValueError('Wrong format for tasks parameter') - - if tasks: - tasks_to_plot = listify(tasks) + fig, axis = self.setup_plot() + + pid = self._trace.get_task_pid(task) + + sw_df = self._trace.df_events("sched_switch") + sw_df = sw_df[sw_df.next_pid == pid] + + if "freq-domains" in self._trace.plat_info: + # If we are aware of frequency domains, use one color per domain + for domain in self._trace.plat_info["freq-domains"]: + sw_df[sw_df["__cpu"].isin(domain)]["__cpu"].plot( + ax=axis, style='+', label="Task running in domain {}".format(domain)) else: - raise ValueError('No tasks to plot specified') - - # Compute number of plots to produce - plots_count = 0 - plots_signals = [ - # Fist plot: task's utilization - {'load_avg', 'util_avg', 'boosted_util'}, - # Second plot: task residency - {'residencies'}, - # Third plot: tasks's load - {'load_sum', 'util_sum', 'period_contrib'} - ] - hr = [] - ysize = 0 - for plot_id, signals_to_plot in enumerate(plots_signals): - signals_to_plot = signals_to_plot.intersection(signals) - if len(signals_to_plot): - plots_count = plots_count + 1 - # Use bigger size only for the first plot - hr.append(3 if plot_id == 0 else 1) - ysize = ysize + (8 if plot_id else 4) - - # Grid - gs = gridspec.GridSpec(plots_count, 1, height_ratios=hr) - gs.update(wspace=0.1, hspace=0.1) - - # Build list of all PIDs for each task_name to plot - pids_to_plot = [] - for task in tasks_to_plot: - # Add specified PIDs to the list - if isinstance(task, int): - pids_to_plot.append(task) - continue - # Otherwise: add all the PIDs for task with the specified name - pids_to_plot.extend(self._trace.getTaskByName(task)) - - for tid in pids_to_plot: - savefig = False - - task_name = self._trace.getTaskByPid(tid) - self.get_logger().info('Plotting [%d:%s]...', tid, task_name) - plot_id = 0 - - # For each task create a figure with plots_count plots - plt.figure(figsize=(16, ysize)) - plt.suptitle('Task Signals', - y=.94, fontsize=16, horizontalalignment='center') - - # Plot load and utilization - signals_to_plot = {'load_avg', 'util_avg', 'boosted_util'} - signals_to_plot = list(signals_to_plot.intersection(signals)) - if len(signals_to_plot) > 0: - axes = plt.subplot(gs[plot_id, 0]) - axes.set_title('Task [{0:d}:{1:s}] Signals' - .format(tid, task_name)) - plot_id = plot_id + 1 - is_last = (plot_id == plots_count) - self._plot_task_signals(axes, tid, signals, is_last) - savefig = True - - # Plot CPUs residency - signals_to_plot = {'residencies'} - signals_to_plot = list(signals_to_plot.intersection(signals)) - if len(signals_to_plot) > 0: - if not self._trace.has_big_little: - self.get_logger().warning( - 'No big.LITTLE platform data, residencies plot disabled') - else: - axes = plt.subplot(gs[plot_id, 0]) - axes.set_title( - 'Task [{0:d}:{1:s}] Residency (green: LITTLE, red: big)' - .format(tid, task_name) - ) - plot_id = plot_id + 1 - is_last = (plot_id == plots_count) - if 'sched_overutilized' in signals: - signals_to_plot.append('sched_overutilized') - self._plot_task_residencies(axes, tid, signals_to_plot, is_last) - savefig = True - - # Plot PELT signals - signals_to_plot = {'load_sum', 'util_sum', 'period_contrib'} - signals_to_plot = list(signals_to_plot.intersection(signals)) - if len(signals_to_plot) > 0: - axes = plt.subplot(gs[plot_id, 0]) - axes.set_title('Task [{0:d}:{1:s}] PELT Signals' - .format(tid, task_name)) - plot_id = plot_id + 1 - if 'sched_overutilized' in signals: - signals_to_plot.append('sched_overutilized') - self._plot_task_pelt(axes, tid, signals_to_plot) - savefig = True - - if not savefig: - self.get_logger().warning('Nothing to plot for %s', task_name) - continue - - # Save generated plots into datadir - if isinstance(task_name, list): - task_name = re.sub('[:/]', '_', task_name[0]) - else: - task_name = re.sub('[:/]', '_', task_name) - figname = '{}/{}task_util_{}_{}.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix, - tid, task_name) - pl.savefig(figname, bbox_inches='tight') - - def plot_big_tasks(self, max_tasks=10, min_samples=100, - min_utilization=None): - """ - For each big task plot utilization and show the smallest cluster - capacity suitable for accommodating task utilization. + sw_df["__cpu"].plot(ax=axis, style='+') + + # Add an extra CPU lane to make room for the legend + ylabels = [''] + [str(n) for n in range(self._trace.plat_info['cpus-count'])] + axis.set_yticklabels(ylabels) + + axis.set_title("CPU residency of task \"{}\"".format(task)) + axis.set_ylabel('CPUs') + axis.grid(True) + axis.legend() + + self._trace.analysis.status.plot_overutilized(axis) - :param max_tasks: maximum number of tasks to consider - :type max_tasks: int + self.save_plot(fig, filepath) - :param min_samples: minumum number of samples over the min_utilization - :type min_samples: int + return axis - :param min_utilization: minimum utilization used to filter samples - default: capacity of a little cluster - :type min_utilization: int + @requires_events(df_task_total_residency.required_events) + def plot_task_total_residency(self, task, filepath=None): """ + Plot a task's total time spent on each CPU - # Get PID of big tasks - big_frequent_task_df = self.df_top_big_tasks( - min_samples, min_utilization) - if big_frequent_task_df is None: - # (Logged already) - return - - if max_tasks > 0: - big_frequent_task_df = big_frequent_task_df.head(max_tasks) - big_frequent_task_pids = big_frequent_task_df.index.values - - big_frequent_tasks_count = len(big_frequent_task_pids) - if big_frequent_tasks_count == 0: - self.get_logger().warning('No big/frequent tasks to plot') - return - - # Get the list of events for all big frequent tasks - df = self.df_load() - big_frequent_tasks_events = df[df.pid.isin(big_frequent_task_pids)] - - # Define axes for side-by-side plottings - fig, axes = plt.subplots(big_frequent_tasks_count, 1, - figsize=(16, big_frequent_tasks_count*4)) - plt.subplots_adjust(wspace=0.1, hspace=0.2) - - plot_idx = 0 - for pid, group in big_frequent_tasks_events.groupby('pid'): - - # # Build task names (there could be multiple, during the task lifetime) - task_name = 'Task [%d:%s]'.format(pid, self._trace.getTaskByPid(pid)) - - # Plot title - if big_frequent_tasks_count == 1: - ax = axes - else: - ax = axes[plot_idx] - ax.set_title(task_name) - - # Left axis: utilization - ax = group.plot(y=['util_avg', 'min_cluster_cap'], - style=['r.', '-b'], - drawstyle='steps-post', - linewidth=1, - ax=ax) - ax.set_xlim(self._trace.x_min, self._trace.x_max) - ax.set_ylim(0, 1100) - ax.set_ylabel('util_avg') - ax.set_xlabel('') - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - plot_idx += 1 - - ax.set_xlabel('Time [s]') - - self.get_logger().info('Tasks which have been a "utilization" of %d for at least %d samples', - self._little_cap, min_samples) - - def plot_wakeup(self, max_tasks=10, min_wakeups=0, per_cluster=False): + :param task: The task's name or PID + :type task: str or int """ - Show waking up tasks over time and newly forked tasks in two separate - plots. + fig, axis = self.setup_plot(height=8) - :param max_tasks: maximum number of tasks to consider - :param max_tasks: int + df = self.df_task_total_residency(task) - :param min_wakeups: minimum number of wakeups of each task - :type min_wakeups: int + df["runtime"].plot.bar(ax=axis) + axis.set_title("CPU residency of task \"{}\"".format(task)) + axis.set_xlabel("CPU") + axis.set_ylabel("Runtime (s)") + axis.grid(True) + + self.save_plot(fig, filepath) + + return axis - :param per_cluster: if True get per-cluster wakeup events - :type per_cluster: bool + def _df_discretize_series(self, series, time_delta, name): """ - if per_cluster is True and \ - not self._trace.hasEvents('sched_wakeup_new'): - self.get_logger().warning('Events [sched_wakeup_new] not found, ' - 'plots DISABLED!') - return - elif not self._trace.hasEvents('sched_wakeup') and \ - not self._trace.hasEvents('sched_wakeup_new'): - self.get_logger().warning('Events [sched_wakeup, sched_wakeup_new] not found, ' - 'plots DISABLED!') - return - - # Define axes for side-by-side plottings - fig, axes = plt.subplots(2, 1, figsize=(14, 5)) - plt.subplots_adjust(wspace=0.2, hspace=0.3) - - if per_cluster: - - # Get per cluster wakeup events - df = self._trace.df_events('sched_wakeup_new') - big_frequent = df.target_cpu.isin(self._big_cpus) - ntbc = df[big_frequent] - ntbc_count = len(ntbc) - little_frequent = df.target_cpu.isin(self._little_cpus) - ntlc = df[little_frequent]; - ntlc_count = len(ntlc) - - self.get_logger().info('%5d tasks forked on big cluster (%3.1f %%)', - ntbc_count, - 100. * ntbc_count / (ntbc_count + ntlc_count)) - self.get_logger().info('%5d tasks forked on LITTLE cluster (%3.1f %%)', - ntlc_count, - 100. * ntlc_count / (ntbc_count + ntlc_count)) - - ax = axes[0] - ax.set_title('Tasks Forks on big CPUs'); - ntbc.pid.plot(style=['g.'], ax=ax); - ax.set_xlim(self._trace.x_min, self._trace.x_max); - ax.set_xticklabels([]) - ax.set_xlabel('') - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - ax = axes[1] - ax.set_title('Tasks Forks on LITTLE CPUs'); - ntlc.pid.plot(style=['g.'], ax=ax); - ax.set_xlim(self._trace.x_min, self._trace.x_max); - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - return - - # Keep events of defined big tasks - wkp_task_pids = self.df_top_wakeup(min_wakeups) - if len(wkp_task_pids): - wkp_task_pids = wkp_task_pids.index.values[:max_tasks] - self.get_logger().info('Plotting %d frequent wakeup tasks', - len(wkp_task_pids)) - - ax = axes[0] - ax.set_title('Tasks WakeUps Events') - df = self._trace.df_events('sched_wakeup') - if len(df): - df = df[df.pid.isin(wkp_task_pids)] - df.pid.astype(int).plot(style=['b.'], ax=ax) - ax.set_xlim(self._trace.x_min, self._trace.x_max) - ax.set_xticklabels([]) - ax.set_xlabel('') - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - ax = axes[1] - ax.set_title('Tasks Forks Events') - df = self._trace.df_events('sched_wakeup_new') - if len(df): - df = df[df.pid.isin(wkp_task_pids)] - df.pid.astype(int).plot(style=['r.'], ax=ax) - ax.set_xlim(self._trace.x_min, self._trace.x_max) - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - def plot_big_tasks_vs_capacity(self, min_samples=1, - min_utilization=None, big_cluster=True): + Discrete the contents of ``series`` in ``time_delta`` buckets + """ + left = self._trace.x_min + data = [] + index = [] + for right in np.arange(left + time_delta, self._trace.x_max, time_delta): + index.append(left) + data.append(series[left:right].count()) + left = right + + return pd.DataFrame(data=data, index=index, columns=[name]) + + def _plot_cpu_heatmap(self, x, y, xbins, colorbar_label, **kwargs): + """ + Plot some data in a heatmap-style 2d histogram """ - Draw a plot that shows whether tasks are placed on the correct cluster - based on their utilization and cluster capacity. Green dots mean the - task was placed on the correct cluster, Red means placement was wrong + nr_cpus = self._trace.cpus_count + fig, axis = self.setup_plot(height=min(4, nr_cpus // 2), width=20) - :param min_samples: minumum number of samples over the min_utilization - :type min_samples: int + _, _, _, img = axis.hist2d(x, y, bins=[xbins, nr_cpus], **kwargs) + fig.colorbar(img, label=colorbar_label) - :param min_utilization: minimum utilization used to filter samples - default: capacity of a little cluster - :type min_utilization: int + return fig, axis - :param big_cluster: - :type big_cluster: bool + @requires_events(["sched_wakeup"]) + def plot_tasks_wakeups(self, target_cpus=None, time_delta=0.01, filepath=None): """ + Plot task wakeups over time - if not self._trace.hasEvents('cpu_frequency'): - self.get_logger().warning('Events [cpu_frequency] not found') - return + :param target_cpus: + :type target_cpus: - # Get all utilization update events - df = self.df_load() - if df is None: - self.get_logger().warning('No trace events for task signals, plot DISABLED') - return + :param time_delta: The discretization delta for summing up wakeups in a + given time delta. + :type time_delta: float + """ + fig, axis = self.setup_plot() - if big_cluster: - cluster_correct = 'big' - cpus = self._big_cpus - else: - cluster_correct = 'LITTLE' - cpus = self._little_cpus - - # Keep events of defined big tasks - big_task_pids = self.df_top_big_tasks( - min_samples, min_utilization) - if big_task_pids is not None: - big_task_pids = big_task_pids.index.values - df = df[df.pid.isin(big_task_pids)] - if not df.size: - self.get_logger().warning('No events for tasks with more then %d utilization ' - 'samples bigger than %d, plots DISABLED!') - return - - fig, axes = plt.subplots(2, 1, figsize=(14, 5)) - plt.subplots_adjust(wspace=0.2, hspace=0.3) - - # Add column of expected cluster depending on: - # a) task utilization value - # b) capacity of the selected cluster - bu_bc = ((df['util_avg'] > self._little_cap) & - (df['cpu'].isin(self._big_cpus))) - su_lc = ((df['util_avg'] <= self._little_cap) & - (df['cpu'].isin(self._little_cpus))) - - # The Cluster CAPacity Matches the UTILization (ccap_mutil) iff: - # - tasks with util_avg > little_cap are running on a BIG cpu - # - tasks with util_avg <= little_cap are running on a LITTLe cpu - df.loc[:,'ccap_mutil'] = np.select([(bu_bc | su_lc)], [True], False) - - df_freq = self._trace.df_events('cpu_frequency') - df_freq = df_freq[df_freq.cpu == cpus[0]] - - ax = axes[0] - ax.set_title('Tasks Utilization vs Allocation') - for ucolor, umatch in zip('gr', [True, False]): - cdata = df[df['ccap_mutil'] == umatch] - if len(cdata) > 0: - cdata['util_avg'].plot(ax=ax, - style=[ucolor+'.'], legend=False) - ax.set_xlim(self._trace.x_min, self._trace.x_max) - ax.set_xticklabels([]) - ax.set_xlabel('') - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - ax = axes[1] - ax.set_title('Frequencies on "{}" cluster'.format(cluster_correct)) - df_freq['frequency'].plot(style=['-b'], ax=ax, drawstyle='steps-post') - ax.set_xlim(self._trace.x_min, self._trace.x_max); - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - legend_y = axes[0].get_ylim()[1] - axes[0].annotate('Utilization-Capacity Matches', - xy=(0, legend_y), - xytext=(-50, 45), textcoords='offset points', - fontsize=18) - axes[0].annotate('Task schduled (green) or not (red) on min cluster', - xy=(0, legend_y), - xytext=(-50, 25), textcoords='offset points', - fontsize=14) + df = self._trace.df_events("sched_wakeup") + if target_cpus: + df = df[df.target_cpu.isin(target_cpus)] -############################################################################### -# Utility Methods -############################################################################### + df = self._df_discretize_series(df["target_cpu"], time_delta, "Wakeup count") + df.plot(ax=axis, legend=False) + + axis.set_title("Number of task wakeups within {}s windows".format(time_delta)) + + self.save_plot(fig, filepath) + + return axis + + @requires_events(["sched_wakeup"]) + def plot_tasks_wakeups_heatmap(self, xbins=100, colormap=None, filepath=None): + """ + :param xbins: Number of x-axis bins, i.e. in how many slices should + time be arranged + :type xbins: int - def _plot_task_signals(self, axes, tid, signals, is_last=False): + :param colormap: The name of a colormap (see + https://matplotlib.org/users/colormaps.html), or a Colormap object + :type colormap: str or matplotlib.colors.Colormap """ - For task with ID `tid` plot the specified signals. - :param axes: axes over which to generate the plot - :type axes: :mod:`matplotlib.axes.Axes` + df = self._trace.df_events("sched_wakeup") - :param tid: task ID - :type tid: int + fig, axis = self._plot_cpu_heatmap( + df.index, df.target_cpu, xbins, "Number of wakeups", cmap=colormap) - :param signals: signals to be plot - :param signals: list(str) + axis.set_title("Tasks wakeups over time") - :param is_last: if True this is the last plot - :type is_last: bool + self.save_plot(fig, filepath) + + return axis + + @requires_events(["sched_wakeup_new"]) + def plot_tasks_forks(self, target_cpus=None, time_delta=0.01, filepath=None): """ - # Get dataframe for the required task - util_df = self.df_load() - if util_df is None: - self.get_logger().warning('No trace events for task signals, plot DISABLED') - return - - # Plot load and util - signals_to_plot = set(signals).difference({'boosted_util'}) - for signal in signals_to_plot: - if signal not in util_df.columns: - continue - data = util_df[util_df.pid == tid][signal] - data.plot(ax=axes, drawstyle='steps-post', legend=True) - - # Plot boost utilization if available - if 'boosted_util' in signals and \ - self._trace.hasEvents('sched_boost_task'): - boost_df = self._trace.df_events('sched_boost_task') - data = boost_df[boost_df.pid == tid][['boosted_util']] - if len(data): - data.plot(ax=axes, style=['y-'], drawstyle='steps-post') - else: - task_name = self._trace.getTaskByPid(tid) - self.get_logger().warning('No "boosted_util" data for task [%d:%s]', - tid, task_name) - - # Add Capacities data if avilable - if 'nrg-model' in self._trace.plat_info: - nrg_model = self._trace.plat_info['nrg-model'] - max_lcap = nrg_model['little']['cpu']['cap_max'] - max_bcap = nrg_model['big']['cpu']['cap_max'] - tip_lcap = 0.8 * max_lcap - tip_bcap = 0.8 * max_bcap - self.get_logger().debug( - 'LITTLE capacity tip/max: %d/%d, big capacity tip/max: %d/%d', - tip_lcap, max_lcap, tip_bcap, max_bcap - ) - axes.axhline(tip_lcap, color='y', linestyle=':', linewidth=2) - axes.axhline(max_lcap, color='y', linestyle='--', linewidth=2) - axes.axhline(tip_bcap, color='r', linestyle=':', linewidth=2) - axes.axhline(max_bcap, color='r', linestyle='--', linewidth=2) - - axes.set_ylim(0, 1100) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.grid(True) - if not is_last: - axes.set_xticklabels([]) - axes.set_xlabel('') - if 'sched_overutilized' in signals: - self._trace.analysis.status.plot_overutilized(axes) - - def _plot_task_residencies(self, axes, tid, signals, is_last=False): + Plot task forks over time + + :param target_cpus: + :type target_cpus: + + :param time_delta: The discretization delta for summing up forks in a + given time delta. + :type time_delta: float """ - For task with ID `tid` plot residency information. + fig, axis = self.setup_plot() + + df = self._trace.df_events("sched_wakeup_new") + + if target_cpus: + df = df[df.target_cpu.isin(target_cpus)] - :param axes: axes over which to generate the plot - :type axes: :mod:`matplotlib.axes.Axes` + df = self._df_discretize_series(df["target_cpu"], time_delta, "Forks count") + df.plot(ax=axis, legend=False) - :param tid: task ID - :type tid: int + axis.set_title("Number of task forks within {}s windows".format(time_delta)) - :param signals: signals to be plot - :param signals: list(str) + self.save_plot(fig, filepath) - :param is_last: if True this is the last plot - :type is_last: bool + return axis + + @requires_events(["sched_wakeup_new"]) + def plot_tasks_forks_heatmap(self, xbins=100, colormap=None, filepath=None): """ - util_df = self.df_load() - if util_df is None: - self.get_logger().warning('No trace events for task signals, plot DISABLED') - return - data = util_df[util_df.pid == tid][['cluster', 'cpu']] - for ccolor, clabel in zip('gr', ['LITTLE', 'big']): - cdata = data[data.cluster == clabel] - if len(cdata) > 0: - cdata.plot(ax=axes, style=[ccolor+'+'], legend=False) - # Y Axis - placeholders for legend, acutal CPUs. topmost empty lane - cpus = [str(n) for n in range(self._trace.plat_info['cpus-count'])] - ylabels = [''] + cpus - axes.set_yticklabels(ylabels) - axes.set_ylim(-1, len(cpus)) - axes.set_ylabel('CPUs') - # X Axis - axes.set_xlim(self._trace.x_min, self._trace.x_max) - - axes.grid(True) - if not is_last: - axes.set_xticklabels([]) - axes.set_xlabel('') - if 'sched_overutilized' in signals: - self._trace.analysis.status.plot_overutilized(axes) - - def _plot_task_pelt(self, axes, tid, signals): + :param xbins: Number of x-axis bins, i.e. in how many slices should + time be arranged + :type xbins: int + + :param colormap: The name of a colormap (see + https://matplotlib.org/users/colormaps.html), or a Colormap object + :type colormap: str or matplotlib.colors.Colormap """ - For task with ID `tid` plot PELT-related signals. - :param axes: axes over which to generate the plot - :type axes: :mod:`matplotlib.axes.Axes` + df = self._trace.df_events("sched_wakeup_new") - :param tid: task ID - :type tid: int + fig, axis = self._plot_cpu_heatmap( + df.index, df.target_cpu, xbins, "Number of forks", cmap=colormap) - :param signals: signals to be plot - :param signals: list(str) - """ - if not self._trace.hasEvents('sched_load_avg_task'): - self.get_logger().warning( - 'No sched_load_avg_task events, skipping PELT plot') - return - - util_df = self._trace.df_events('sched_load_avg_task') - data = util_df[util_df.pid == tid][['load_sum', - 'util_sum', - 'period_contrib']] - data.plot(ax=axes, drawstyle='steps-post') - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.ticklabel_format(style='scientific', scilimits=(0, 0), - axis='y', useOffset=False) - axes.grid(True) - if 'sched_overutilized' in signals: - self._trace.analysis.status.plot_overutilized(axes) + axis.set_title("Tasks forks over time") + + self.save_plot(fig, filepath) + + return axis # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 -- GitLab From ddba8d79084a2ddc71395b66a51ff8236acffa25 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 20 Nov 2018 16:56:46 +0000 Subject: [PATCH 29/56] analysis: Move idle-related analysis from cpus to idle analysis --- lisa/analysis/cpus.py | 96 ------------------------------------- lisa/analysis/frequency.py | 2 +- lisa/analysis/idle.py | 98 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 96 insertions(+), 100 deletions(-) diff --git a/lisa/analysis/cpus.py b/lisa/analysis/cpus.py index 0207fca0e..8f1598250 100644 --- a/lisa/analysis/cpus.py +++ b/lisa/analysis/cpus.py @@ -17,13 +17,8 @@ """ CPUs Analysis Module """ -import operator -from functools import reduce - import pandas as pd -from trappy.utils import handle_duplicate_index - from lisa.utils import memoized from lisa.analysis.base import AnalysisBase, requires_events @@ -63,97 +58,6 @@ class CpusAnalysis(AnalysisBase): return ctx_sw_df - @requires_events(['cpu_idle']) - def df_cpu_wakeups(self, cpus=None): - """" - Get a DataFrame showing when a CPU was woken from idle - - :param cpus: List of CPUs to find wakeups for. If None, all CPUs. - :type cpus: list(int) or None - - :returns: A :class:`pandas.DataFrame` with - - * A ``cpu`` column (the CPU that woke up at the row index) - """ - cpus = cpus or list(range(self._trace.cpus_count)) - - sr = pd.Series() - for cpu in cpus: - cpu_sr = self._trace.getCPUActiveSignal(cpu) - cpu_sr = cpu_sr[cpu_sr == 1] - cpu_sr = cpu_sr.replace(1, cpu) - sr = sr.append(cpu_sr) - - return pd.DataFrame({'cpu': sr}).sort_index() - - @memoized - @requires_events(['cpu_idle']) - def signal_cpu_active(self, cpu): - """ - Build a square wave representing the active (i.e. non-idle) CPU time - - :param cpu: CPU ID - :type cpu: int - - :returns: A :class:`pandas.Series` that equals 1 at timestamps where the - CPU is reported to be non-idle, 0 otherwise - """ - idle_df = self._trace.df_events('cpu_idle') - cpu_df = idle_df[idle_df.cpu_id == cpu] - - cpu_active = cpu_df.state.apply( - lambda s: 1 if s == -1 else 0 - ) - - start_time = 0.0 - if not self._trace.ftrace.normalized_time: - start_time = self._trace.ftrace.basetime - - if cpu_active.empty: - cpu_active = pd.Series([0], index=[start_time]) - elif cpu_active.index[0] != start_time: - entry_0 = pd.Series(cpu_active.iloc[0] ^ 1, index=[start_time]) - cpu_active = pd.concat([entry_0, cpu_active]) - - # Fix sequences of wakeup/sleep events reported with the same index - return handle_duplicate_index(cpu_active) - - @requires_events(signal_cpu_active.required_events) - def signal_cluster_active(self, cluster): - """ - Build a square wave representing the active (i.e. non-idle) cluster time - - :param cluster: list of CPU IDs belonging to a cluster - :type cluster: list(int) - - :returns: A :class:`pandas.Series` that equals 1 at timestamps where at - least one CPU is reported to be non-idle, 0 otherwise - """ - active = self.signal_cpu_active(cluster[0]).to_frame(name=cluster[0]) - for cpu in cluster[1:]: - active = active.join( - self.signal_cpu_active(cpu).to_frame(name=cpu), - how='outer' - ) - - active.fillna(method='ffill', inplace=True) - # There might be NaNs in the signal where we got data from some CPUs - # before others. That will break the .astype(int) below, so drop rows - # with NaN in them. - active.dropna(inplace=True) - - # Cluster active is the OR between the actives on each CPU - # belonging to that specific cluster - cluster_active = reduce( - operator.or_, - [cpu_active.astype(int) for _, cpu_active in - active.items()] - ) - - return cluster_active - - - ############################################################################### # Plotting Methods ############################################################################### diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index 1f8793e00..a2dbe27fa 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -77,7 +77,7 @@ class FrequencyAnalysis(AnalysisBase): time_df = cluster_freqs[["total_time", "frequency"]].groupby(["frequency"]).sum() # Compute ACTIVE Time - cluster_active = self._trace.analysis.cpus.signal_cluster_active(cpus) + cluster_active = self._trace.analysis.idle.signal_cluster_active(cpus) # In order to compute the active time spent at each frequency we # multiply 2 square waves: diff --git a/lisa/analysis/idle.py b/lisa/analysis/idle.py index 751cbc688..e7bc3332b 100644 --- a/lisa/analysis/idle.py +++ b/lisa/analysis/idle.py @@ -15,15 +15,18 @@ # limitations under the License. # -""" Idle Analysis Module """ +from functools import reduce +import operator import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt import pandas as pd import pylab as pl -from lisa.analysis.base import AnalysisBase -from trappy.utils import listify +from trappy.utils import listify, handle_duplicate_index + +from lisa.utils import memoized +from lisa.analysis.base import AnalysisBase, requires_events class IdleAnalysis(AnalysisBase): @@ -43,6 +46,95 @@ class IdleAnalysis(AnalysisBase): # DataFrame Getter Methods ############################################################################### + @memoized + @requires_events(['cpu_idle']) + def signal_cpu_active(self, cpu): + """ + Build a square wave representing the active (i.e. non-idle) CPU time + + :param cpu: CPU ID + :type cpu: int + + :returns: A :class:`pandas.Series` that equals 1 at timestamps where the + CPU is reported to be non-idle, 0 otherwise + """ + idle_df = self._trace.df_events('cpu_idle') + cpu_df = idle_df[idle_df.cpu_id == cpu] + + cpu_active = cpu_df.state.apply( + lambda s: 1 if s == -1 else 0 + ) + + start_time = 0.0 + if not self._trace.ftrace.normalized_time: + start_time = self._trace.ftrace.basetime + + if cpu_active.empty: + cpu_active = pd.Series([0], index=[start_time]) + elif cpu_active.index[0] != start_time: + entry_0 = pd.Series(cpu_active.iloc[0] ^ 1, index=[start_time]) + cpu_active = pd.concat([entry_0, cpu_active]) + + # Fix sequences of wakeup/sleep events reported with the same index + return handle_duplicate_index(cpu_active) + + @requires_events(signal_cpu_active.required_events) + def signal_cluster_active(self, cluster): + """ + Build a square wave representing the active (i.e. non-idle) cluster time + + :param cluster: list of CPU IDs belonging to a cluster + :type cluster: list(int) + + :returns: A :class:`pandas.Series` that equals 1 at timestamps where at + least one CPU is reported to be non-idle, 0 otherwise + """ + active = self.signal_cpu_active(cluster[0]).to_frame(name=cluster[0]) + for cpu in cluster[1:]: + active = active.join( + self.signal_cpu_active(cpu).to_frame(name=cpu), + how='outer' + ) + + active.fillna(method='ffill', inplace=True) + # There might be NaNs in the signal where we got data from some CPUs + # before others. That will break the .astype(int) below, so drop rows + # with NaN in them. + active.dropna(inplace=True) + + # Cluster active is the OR between the actives on each CPU + # belonging to that specific cluster + cluster_active = reduce( + operator.or_, + [cpu_active.astype(int) for _, cpu_active in + active.items()] + ) + + return cluster_active + + @requires_events(['cpu_idle']) + def df_cpus_wakeups(self): + """" + Get a DataFrame showing when CPUs have woken from idle + + :param cpus: List of CPUs to find wakeups for. If None, all CPUs. + :type cpus: list(int) or None + + :returns: A :class:`pandas.DataFrame` with + + * A ``cpu`` column (the CPU that woke up at the row index) + """ + cpus = list(range(self._trace.cpus_count)) + + sr = pd.Series() + for cpu in cpus: + cpu_sr = self._trace.getCPUActiveSignal(cpu) + cpu_sr = cpu_sr[cpu_sr == 1] + cpu_sr = cpu_sr.replace(1, cpu) + sr = sr.append(cpu_sr) + + return pd.DataFrame({'cpu': sr}).sort_index() + def df_cpu_idle_state_residency(self, cpu): """ Compute time spent by a given CPU in each idle state. -- GitLab From 886ae211fa8434a3d116249434b6d0a63f45c0dd Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 20 Nov 2018 19:00:03 +0000 Subject: [PATCH 30/56] analysis: Rework idle analysis --- lisa/analysis/idle.py | 255 ++++++++++++++++-------------------------- 1 file changed, 94 insertions(+), 161 deletions(-) diff --git a/lisa/analysis/idle.py b/lisa/analysis/idle.py index e7bc3332b..722e751b8 100644 --- a/lisa/analysis/idle.py +++ b/lisa/analysis/idle.py @@ -135,24 +135,23 @@ class IdleAnalysis(AnalysisBase): return pd.DataFrame({'cpu': sr}).sort_index() + @requires_events(["cpu_idle"]) def df_cpu_idle_state_residency(self, cpu): """ Compute time spent by a given CPU in each idle state. - :param entity: CPU ID - :type entity: int + :param cpu: CPU ID + :type cpu: int - :returns: :mod:`pandas.DataFrame` - idle state residency dataframe - """ - if not self._trace.hasEvents('cpu_idle'): - self.get_logger().warning('Events [cpu_idle] not found, ' - 'idle state residency computation not possible!') - return None + :returns: a :class:`pandas.DataFrame` with: + * Idle states as index + * A ``time`` column (The time spent in the idle state) + """ idle_df = self._trace.df_events('cpu_idle') cpu_idle = idle_df[idle_df.cpu_id == cpu] - cpu_is_idle = self._trace.getCPUActiveSignal(cpu) ^ 1 + cpu_is_idle = self.signal_cpu_active(cpu) ^ 1 # In order to compute the time spent in each idle state we # multiply 2 square waves: @@ -185,35 +184,26 @@ class IdleAnalysis(AnalysisBase): idle_time_df.index.name = 'idle_state' return idle_time_df + @requires_events(['cpu_idle']) def df_cluster_idle_state_residency(self, cluster): """ Compute time spent by a given cluster in each idle state. - :param cluster: cluster name or list of CPU IDs - :type cluster: str or list(int) + :param cluster: list of CPU IDs + :type cluster: list(int) - :returns: :mod:`pandas.DataFrame` - idle state residency dataframe - """ - if not self._trace.hasEvents('cpu_idle'): - self.get_logger().warning('Events [cpu_idle] not found, ' - 'idle state residency computation not possible!') - return None - - _cluster = cluster - if isinstance(cluster, str) or isinstance(cluster, str): - try: - _cluster = self._trace.plat_info['clusters'][cluster.lower()] - except KeyError: - self.get_logger().warning('%s cluster not found!', cluster) - return None + :returns: a :class:`pandas.DataFrame` with: + * Idle states as index + * A ``time`` column (The time spent in the idle state) + """ idle_df = self._trace.df_events('cpu_idle') # Each core in a cluster can be in a different idle state, but the # cluster lies in the idle state with lowest ID, that is the shallowest # idle state among the idle states of its CPUs - cl_idle = idle_df[idle_df.cpu_id == _cluster[0]].state.to_frame( - name=_cluster[0]) - for cpu in _cluster[1:]: + cl_idle = idle_df[idle_df.cpu_id == cluster[0]].state.to_frame( + name=cluster[0]) + for cpu in cluster[1:]: cl_idle = cl_idle.join( idle_df[idle_df.cpu_id == cpu].state.to_frame(name=cpu), how='outer' @@ -225,9 +215,9 @@ class IdleAnalysis(AnalysisBase): # cl_is_idle[t] == 1 if all CPUs in the cluster are reported # to be idle by cpufreq at time t # cl_is_idle[t] == 0 otherwise - cl_is_idle = self._trace.getClusterActiveSignal(_cluster) ^ 1 + cl_is_idle = self.signal_cluster_active(cluster) ^ 1 - # In order to compute the time spent in each idle statefrequency we + # In order to compute the time spent in each idle state frequency we # multiply 2 square waves: # - cluster_is_idle # - idle_state, square wave of the form: @@ -257,161 +247,104 @@ class IdleAnalysis(AnalysisBase): # Plotting Methods ############################################################################### - def plot_cpu_idle_state_residency(self, cpus=None, pct=False): + @requires_events(df_cpu_idle_state_residency.required_events) + def plot_cpu_idle_state_residency(self, cpu, filepath=None, pct=False): """ - Plot per-CPU idle state residency. big CPUs are plotted first and then - LITTLEs. + Plot the idle state residency of a CPU - Requires cpu_idle trace events. + :param cpu: The CPU + :type cpu: int + + :param pct: Plot residencies in percentage + :type pct: bool + """ + fig, axis = self.setup_plot() + + df = self.df_cpu_idle_state_residency(cpu) - :param cpus: list of CPU IDs. By default plot all CPUs - :type cpus: list(int) or int + self._plot_idle_state_residency(df, axis, pct) - :param pct: plot residencies in percentage + axis.set_title("CPU{} idle state residency".format(cpu)) + + self.save_plot(fig, filepath) + + return axis + + @requires_events(df_cluster_idle_state_residency.required_events) + def plot_cluster_idle_state_residency(self, cluster, filepath=None, + pct=False, axis=None): + """ + Plot the idle state residency of a cluster + + :param cluster: The cluster + :type cpu: list(int) + + :param pct: Plot residencies in percentage :type pct: bool + + :param axes: If specified, the axis to use for plotting + :type axis: matplotlib.axes.Axes """ - if not self._trace.hasEvents('cpu_idle'): - self.get_logger().warning('Events [cpu_idle] not found, ' - 'plot DISABLED!') - return - - if cpus is None: - # Generate plots only for available CPUs - cpuidle_data = self._trace.df_events('cpu_idle') - _cpus = list(range(cpuidle_data.cpu_id.max() + 1)) - else: - _cpus = listify(cpus) + local_fig = axis is None + + if local_fig: + fig, axis = self.setup_plot() - # Split between big and LITTLE CPUs ordered from higher to lower ID - _cpus.reverse() - big_cpus = [c for c in _cpus if c in self._big_cpus] - little_cpus = [c for c in _cpus if c in self._little_cpus] - _cpus = big_cpus + little_cpus + df = self.df_cluster_idle_state_residency(cluster) - residencies = [] - xmax = 0.0 - for cpu in _cpus: - r = self.df_cpu_idle_state_residency(cpu) - residencies.append(ResidencyData('CPU{}'.format(cpu), r)) + self._plot_idle_state_residency(df, axis, pct) - max_time = r.max().values[0] - if xmax < max_time: - xmax = max_time + axis.set_title("CPUs {} idle state residency".format(cluster)) - self._plot_idle_state_residency(residencies, 'cpu', xmax, pct=pct) + if local_fig: + self.save_plot(fig, filepath) - def plot_cluster_idle_state_residency(self, clusters=None, pct=False): + return axis + + @requires_events(plot_cluster_idle_state_residency.required_events) + def plot_clusters_idle_state_residency(self, filepath=None, pct=False): """ - Plot per-cluster idle state residency in a given cluster, i.e. the - amount of time cluster `cluster` spent in idle state `i`. By default, - both 'big' and 'LITTLE' clusters data are plotted. - - Requires cpu_idle following trace events. - :param clusters: name of the clusters to be plotted (all of them by - default) - :type clusters: str ot list(str) + Plot the idle state residency of all clusters + + :param pct: Plot residencies in percentage + :type pct: bool + + .. note:: This assumes clusters == frequency domains, which may + not hold true... """ - if not self._trace.hasEvents('cpu_idle'): - self.get_logger().warning('Events [cpu_idle] not found, plot DISABLED!') - return - if 'clusters' not in self._trace.plat_info: - self.get_logger().warning('No platform cluster info. Plot DISABLED!') - return - - # Sanitize clusters - if clusters is None: - _clusters = list(self._trace.plat_info['clusters'].keys()) - else: - _clusters = listify(clusters) + clusters = self._trace.plat_info['freq-domains'] + + fig, axes = self.setup_plot(nrows=len(clusters), sharex=True) - # Precompute residencies for each cluster - residencies = [] - xmax = 0.0 - for c in _clusters: - r = self.df_cluster_idle_state_residency(c.lower()) - residencies.append(ResidencyData('{} Cluster'.format(c), r)) + for idx, cluster in enumerate(clusters): + axis = axes[idx] + self.cycle_colors(axis, idx) - max_time = r.max().values[0] - if xmax < max_time: - xmax = max_time + self.plot_cluster_idle_state_residency(cluster, pct=pct, axis=axis) - self._plot_idle_state_residency(residencies, 'cluster', xmax, pct=pct) + self.save_plot(fig, filepath) + + return axes ############################################################################### # Utility Methods ############################################################################### - def _plot_idle_state_residency(self, residencies, entity_name, xmax, - pct=False): + def _plot_idle_state_residency(self, df, axis, pct): """ - Generate Idle state residency plots for the given entities. - - :param residencies: list of residencies to be plot - :type residencies: list(namedtuple(ResidencyData)) - each tuple - contains: - - a label to be used as subplot title - - a dataframe with residency for each idle state + A convenient helper to plot idle state residency + """ + if pct: + df = df * 100 / df.sum() - :param entity_name: name of the entity ('cpu' or 'cluster') used in the - figure name - :type entity_name: str + df["time"].plot.barh(ax=axis, color=self.get_next_color(axis)) - :param xmax: upper bound of x-axes - :type xmax: double + if pct: + axis.set_xlabel("Time share (%)") + else: + axis.set_xlabel("Time (s)") - :param pct: plot residencies in percentage - :type pct: bool - """ - n_plots = len(residencies) - gs = gridspec.GridSpec(n_plots, 1) - fig = plt.figure() - - for idx, data in enumerate(residencies): - r = data.residency - if r is None: - plt.close(fig) - return - - axes = fig.add_subplot(gs[idx]) - is_first = idx == 0 - is_last = idx+1 == n_plots - yrange = 0.4 * max(6, len(r)) * n_plots - if pct: - duration = r.time.sum() - r_pct = r.apply(lambda x: x*100/duration) - r_pct.columns = [data.label] - r_pct.T.plot.barh(ax=axes, stacked=True, figsize=(16, yrange)) - - axes.legend(loc='lower center', ncol=7) - axes.set_xlim(0, 100) - else: - r.plot.barh(ax=axes, color='g', - legend=False, figsize=(16, yrange)) - - axes.set_xlim(0, 1.05*xmax) - axes.set_ylabel('Idle State') - axes.set_title(data.label) - - axes.grid(True) - if is_last: - if pct: - axes.set_xlabel('Residency [%]') - else: - axes.set_xlabel('Time [s]') - else: - axes.set_xticklabels([]) - - if is_first: - legend_y = axes.get_ylim()[1] - axes.annotate('Idle State Residency Time', xy=(0, legend_y), - xytext=(-50, 45), textcoords='offset points', - fontsize=18) - - figname = '{}/{}{}_idle_state_residency.png'\ - .format(self._trace.plots_dir, - self._trace.plots_prefix, - entity_name) - - pl.savefig(figname, bbox_inches='tight') + axis.set_ylabel("Idle state") + axis.grid(True) # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 -- GitLab From f0e0e883a844ca37f28834dbb145dd3a9bfe2849 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 20 Nov 2018 15:58:00 +0000 Subject: [PATCH 31/56] doc/conf: Use official matplotlib URL for intersphinx --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index e2faab9be..f36ea58eb 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -330,7 +330,7 @@ texinfo_documents = [ intersphinx_mapping = { 'python' : ('https://docs.python.org/3', None), 'pandas' : ('https://pandas.pydata.org/pandas-docs/stable/', None), - 'matplotlib' : ('http://matplotlib.sourceforge.net/', None), + 'matplotlib' : ('https://matplotlib.org', None), # XXX: Doesn't seem to work, might be due to how devlib doc is generated 'devlib' : ('https://pythonhosted.org/devlib/', None), 'trappy' : ('https://pythonhosted.org/TRAPpy', None), -- GitLab From 300bec9bf23247edcf3f8d28c712aadb58f812d3 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 20 Nov 2018 15:59:04 +0000 Subject: [PATCH 32/56] doc/conf: Rename test method autodoc callback --- doc/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index f36ea58eb..a8a2b2fc8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -372,7 +372,7 @@ def is_test(method): for cls in base_cls_list ) -def autodoc_process_docstring(app, what, name, obj, options, lines): +def autodoc_process_test_method(app, what, name, obj, options, lines): # Append the list of available test methods for all classes that appear to # have some. if what == 'class': @@ -391,6 +391,6 @@ def autodoc_process_docstring(app, what, name, obj, options, lines): lines.extend(test_list_doc.splitlines()) def setup(app): - app.connect('autodoc-process-docstring', autodoc_process_docstring) + app.connect('autodoc-process-docstring', autodoc_process_test_method) # vim :set tabstop=4 shiftwidth=4 textwidth=80 expandtab -- GitLab From c7c0616c0be34513067ab6d818725d42ffc2a51c Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 20 Nov 2018 15:59:48 +0000 Subject: [PATCH 33/56] doc/conf: Document events tagged by analysis.base.requires_events --- doc/conf.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/conf.py b/doc/conf.py index a8a2b2fc8..fac1a1c3b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -390,7 +390,20 @@ def autodoc_process_test_method(app, what, name, obj, options, lines): lines.extend(test_list_doc.splitlines()) +def autodoc_process_analysis_events(app, what, name, obj, options, lines): + # Append the list of required trace events + if what != 'method' or not hasattr(obj, "required_events"): + return + + events = obj.required_events + + events_doc = "\n:Required trace events:\n\n{}\n\n".format( + "\n".join([" * ``{}``".format(event) for event in events])) + + lines.extend(events_doc.splitlines()) + def setup(app): app.connect('autodoc-process-docstring', autodoc_process_test_method) + app.connect('autodoc-process-docstring', autodoc_process_analysis_events) # vim :set tabstop=4 shiftwidth=4 textwidth=80 expandtab -- GitLab From 75e7646a0d6b6f1f0d01a00373463aae416ee666 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 20 Nov 2018 19:02:32 +0000 Subject: [PATCH 34/56] doc: Add analysis --- doc/analysis.rst | 50 ++++++++++++++++++++++++++++++++++++++++++++++++ doc/index.rst | 1 + 2 files changed, 51 insertions(+) create mode 100644 doc/analysis.rst diff --git a/doc/analysis.rst b/doc/analysis.rst new file mode 100644 index 000000000..f4f0b3aa2 --- /dev/null +++ b/doc/analysis.rst @@ -0,0 +1,50 @@ +********************* +Kernel trace analysis +********************* + +Base class +========== + +.. automodule:: lisa.analysis.base + :members: + +Load tracking +============= + +.. automodule:: lisa.analysis.load_tracking + :members: + +CPUs +==== + +.. automodule:: lisa.analysis.cpus + :members: + +Frequency +========= + +.. automodule:: lisa.analysis.frequency + :members: + +Tasks +===== + +.. These two autoclasses should not be necessary, but sphinx doesn't seem + to like Enums and refuses to do anything with TaskState unless explicetely + told to. + +.. autoclass:: lisa.analysis.tasks.StateInt + :members: + +.. autoclass:: lisa.analysis.tasks.TaskState + :members: + +.. automodule:: lisa.analysis.tasks + :members: + :exclude-members: StateInt, TaskState + +Idle +==== + +.. automodule:: lisa.analysis.idle + :members: diff --git a/doc/index.rst b/doc/index.rst index d76b9d617..c3d0203fb 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -34,6 +34,7 @@ Contents: kernel_tests wlgen internals + analysis lisa_tests -- GitLab From 097087ddc85f2d698146002ca88ea395cc76905e Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 27 Nov 2018 15:01:03 +0000 Subject: [PATCH 35/56] analysis/load_tracking: docstring cleanup --- lisa/analysis/load_tracking.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lisa/analysis/load_tracking.py b/lisa/analysis/load_tracking.py index cd7062dde..fa98ede4b 100644 --- a/lisa/analysis/load_tracking.py +++ b/lisa/analysis/load_tracking.py @@ -17,8 +17,6 @@ """ Scheduler load tracking analysis module """ -import matplotlib.pyplot as plt -import pylab as pl import pandas as pd from lisa.analysis.base import AnalysisBase @@ -29,7 +27,7 @@ class LoadTrackingAnalysis(AnalysisBase): Support for scheduler load tracking analysis :param trace: input Trace object - :type trace: :class:`Trace` + :type trace: lisa.trace.Trace """ name = 'load_tracking' -- GitLab From d7f5913e019fa86db18df2e5cbfc47f5f18be57a Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 27 Nov 2018 15:00:46 +0000 Subject: [PATCH 36/56] analysis/cpus: Rename plot_context_switch to plot_context_switches --- lisa/analysis/cpus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lisa/analysis/cpus.py b/lisa/analysis/cpus.py index 8f1598250..474730630 100644 --- a/lisa/analysis/cpus.py +++ b/lisa/analysis/cpus.py @@ -63,7 +63,7 @@ class CpusAnalysis(AnalysisBase): ############################################################################### @requires_events(df_context_switches.required_events) - def plot_context_switch(self, filepath=None): + def plot_context_switches(self, filepath=None): """ Plot histogram of context switches on each CPU. """ -- GitLab From 18c60dfd5fe71320b817d59dd2b8c2be2d4c9d3f Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 28 Nov 2018 19:35:36 +0000 Subject: [PATCH 37/56] analysis: Rework latency analysis --- lisa/analysis/latency.py | 920 +++++++++++++-------------------------- 1 file changed, 295 insertions(+), 625 deletions(-) diff --git a/lisa/analysis/latency.py b/lisa/analysis/latency.py index 240e7d1e3..3c201c036 100644 --- a/lisa/analysis/latency.py +++ b/lisa/analysis/latency.py @@ -15,26 +15,22 @@ # limitations under the License. # -""" Latency Analysis Module """ +import re +import os import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt import numpy as np import pandas as pd import pylab as pl -import re -import os -from collections import namedtuple -from lisa.analysis.base import AnalysisBase -from lisa.utils import memoized from trappy.utils import listify from devlib.target import KernelVersion -# Tuple representing all IDs data of a Task -TaskData = namedtuple('TaskData', ['pid', 'names', 'label']) - -CDF = namedtuple('CDF', ['df', 'threshold', 'above', 'below']) +from collections import namedtuple +from lisa.analysis.base import AnalysisBase, requires_events, COLOR_CYCLES +from lisa.analysis.tasks import TaskState, TasksAnalysis +from lisa.utils import memoized class LatencyAnalysis(AnalysisBase): """ @@ -46,6 +42,9 @@ class LatencyAnalysis(AnalysisBase): name = 'latency' + LATENCY_THRESHOLD_ZONE_COLOR=COLOR_CYCLES[2] + LATENCY_THRESHOLD_COLOR=COLOR_CYCLES[3] + def __init__(self, trace): super(LatencyAnalysis, self).__init__(trace) @@ -53,704 +52,375 @@ class LatencyAnalysis(AnalysisBase): # DataFrame Getter Methods ############################################################################### - # Select Wakeup latency + @requires_events(TasksAnalysis.df_task_states.required_events) def df_latency_wakeup(self, task): """ - DataFrame of task's wakeup latencies + DataFrame of a task's wakeup latencies - The returned DataFrame index is the time, in seconds, `task` waken-up. - The DataFrame has just one column: - - wakeup_latency: the time the task waited before getting a CPU - - :param task: the task to report wakeup latencies for + :param task: The task's name or PID :type task: int or str + + :returns: a :class:`pandas.DataFrame` with: + + * A ``wakeup_latency`` column (the wakeup latency at that timestamp). """ - task_latency_df = self.df_latency(task) - if task_latency_df is None: - return None - df = task_latency_df[ - (task_latency_df.curr_state == 'W') & - (task_latency_df.next_state == 'A')][['t_delta']] - df.rename(columns={'t_delta' : 'wakeup_latency'}, inplace=True) + df = self._trace.analysis.tasks.df_task_states(task) + + df = df[(df.curr_state == TaskState.TASK_WAKING.char) & + (df.next_state == TaskState.TASK_ACTIVE.char)][["delta"]] + + df.rename(columns={'delta' : 'wakeup_latency'}, inplace=True) return df - # Select Wakeup latency + @requires_events(TasksAnalysis.df_task_states.required_events) def df_latency_preemption(self, task): """ - DataFrame of task's preemption latencies + DataFrame of a task's preemption latencies - The returned DataFrame index is the time, in seconds, `task` has been - preempted. - The DataFrame has just one column: - - preemption_latency: the time the task waited before getting again a CPU - - :param task: the task to report wakeup latencies for + :param task: The task's name or PID :type task: int or str + + :returns: a :class:`pandas.DataFrame` with: + + * A ``preempt_latency`` column (the preemption latency at that timestamp). """ - task_latency_df = self.df_latency(task) - if task_latency_df is None: - return None - df = task_latency_df[ - (task_latency_df.curr_state.isin([0, 'R', 'R+'])) & - (task_latency_df.next_state == 'A')][['t_delta']] - df.rename(columns={'t_delta' : 'preempt_latency'}, inplace=True) + df = self._trace.analysis.tasks.df_task_states(task) + + df = df[(df.curr_state.str.contains(TaskState.TASK_RUNNING.char)) & + (df.next_state == TaskState.TASK_ACTIVE.char)][["delta"]] + + df.rename(columns={'delta' : 'preempt_latency'}, inplace=True) return df - @memoized + @requires_events(TasksAnalysis.df_task_states.required_events) def df_activations(self, task): """ - DataFrame of task's wakeup intrvals + DataFrame of a task's activations - The returned DataFrame index is the time, in seconds, `task` has - waken-up. - The DataFrame has just one column: - - activation_interval: the time since the previous wakeup events - - :param task: the task to report runtimes for + :param task: The task's name or PID :type task: int or str + + :returns: a :class:`pandas.DataFrame` with: + + * An ``activation_interval`` column (the time since the last activation). """ - # Select all wakeup events - wkp_df = self.df_latency(task) - wkp_df = wkp_df[wkp_df.curr_state == 'W'].copy() - # Compute delta between successive wakeup events - wkp_df['activation_interval'] = ( - wkp_df['t_start'].shift(-1) - wkp_df['t_start']) - wkp_df['activation_interval'] = wkp_df['activation_interval'].shift(1) - # Return the activation period each time the task wakeups - wkp_df = wkp_df[['activation_interval']].shift(-1) - return wkp_df - - @memoized + wkp_df = self._trace.analysis.tasks.df_task_states(task) + wkp_df = wkp_df[wkp_df.curr_state == TaskState.TASK_WAKING.char] + + index = wkp_df.index.to_frame() + wkp_df['activation_interval'] = (index.shift(-1) - index).shift(1) + + return wkp_df[["activation_interval"]] + + @requires_events(TasksAnalysis.df_task_states.required_events) def df_runtimes(self, task): """ DataFrame of task's runtime each time the task blocks - The returned DataFrame index is the time, in seconds, `task` completed - an activation (i.e. sleep or exit) - The DataFrame has just one column: - - running_time: the time the task spent RUNNING since its last wakeup - - :param task: the task to report runtimes for + :param task: The task's name or PID :type task: int or str + + :returns: a :class:`pandas.DataFrame` with: + + * The times where the task stopped running as an index + * A ``curr_state`` column (the current task state, see + :class:`lisa.analysis.tasks.TaskState`) + * A ``running_time`` column (the cumulated running time since the + last activation). """ - # Select all wakeup events - run_df = self.df_latency(task) - - # Filter function to add up RUNNING intervals of each activation - def cr(row): - if row['curr_state'] in ['S']: - return cr.runtime - if row['curr_state'] in ['W']: - if cr.spurious_wkp: - cr.runtime += row['t_delta'] - cr.spurious_wkp = False - return cr.runtime - cr.runtime = 0 - return cr.runtime - if row['curr_state'] != 'A': - return cr.runtime - if row['next_state'] in ['R', 'R+', 'S', 'x', 'D']: - cr.runtime += row['t_delta'] - return cr.runtime - # This is required to capture strange trace sequences where - # a switch_in event is follower by a wakeup_event. - # This sequence is not expected, but we found it in some traces. - # Possible reasons could be: - # - misplaced sched_wakeup events - # - trace buffer artifacts - # TO BE BETTER investigated in kernel space. - # For the time being, we account this interval as RUNNING time, - # which is what kernelshark does. - if row['next_state'] in ['W']: - cr.runtime += row['t_delta'] - cr.spurious_wkp = True - return cr.runtime - if row['next_state'] in ['n']: - return cr.runtime - self.get_logger().warning("Unexpected next state: %s @ %f", - row['next_state'], row['t_start']) - return 0 - # cr's static variables intialization - cr.runtime = 0 - cr.spurious_wkp = False - - # Add up RUNNING intervals of each activation - run_df['running_time'] = run_df.apply(cr, axis=1) - # Return RUNTIME computed for each activation, - # each time the task blocks or terminate - run_df = run_df[run_df.next_state.isin(['S', 'x'])][['running_time']] - return run_df - - @memoized - def _get_latency_df(self, task, kind='all', threshold_ms=1): + df = self._trace.analysis.tasks.df_task_states(task) + + runtimes = [] + spurious_wkp = False + + # Using df.apply() is risky for counting (can be called more than once + # on the same row), so use a loop instead + for index, row in df.iterrows(): + runtime = runtimes[-1] if len(runtimes) else 0 + + if row.curr_state == TaskState.TASK_WAKING.char: + # This is required to capture strange trace sequences where + # a switch_in event is followed by a wakeup_event. + # This sequence is not expected, but we found it in some traces. + # Possible reasons could be: + # - misplaced sched_wakeup events + # - trace buffer artifacts + # TO BE BETTER investigated in kernel space. + # For the time being, we account this interval as RUNNING time, + # which is what kernelshark does. + if spurious_wkp: + runtime += row.delta + spurious_wkp = False + else: + # This is a new activation, reset the runtime counter + runtime = 0 + + elif row.curr_state == TaskState.TASK_ACTIVE.char: + # This is the spurious wakeup thing mentionned above + if row.next_state == TaskState.TASK_WAKING.char: + spurious_wkp = True + + runtime += row.delta + + runtimes.append(runtime) + + df["running_time"] = runtimes + + # The runtime column is not entirely correct - at a task's first + # TASK_ACTIVE occurence, the running_time will be non-zero, even + # though the task has not run yet. However, it's much simpler to + # accumulate the running_time the way we do and shift it later. + df.running_time = df.running_time.shift(1) + df.running_time = df.running_time.fillna(0) + + return df[~df.curr_state.isin([ + TaskState.TASK_ACTIVE.char, + TaskState.TASK_WAKING.char + ])][["curr_state", "running_time"]] + +############################################################################### +# Plotting Methods +############################################################################### + + @requires_events(df_latency_wakeup.required_events) + def plot_latencies(self, task, wakeup=True, preempt=True, threshold_ms=1, + filepath=None): """ - Compute statistics on latencies of the specified task. + Plot the latencies of a task over time + + :param task: The task's name or PID + :type task: int or str - :param task: the task to report latencies for - :type task: int or list(str) + :param wakeup: Whether to plot wakeup latencies + :type wakeup: bool - :param kind: the kind of latencies to report (WAKEUP and/or PREEMPT") - :type kind: str + :param preempt: Whether to plot preemption latencies + :type preempt: bool - :param threshold_ms: the minimum acceptable [ms] value to report - graphically in the generated plots + :param threshold_ms: The latency threshold to plot :type threshold_ms: int or float + """ + fig, axis = self.setup_plot() + + axis.axhline(threshold_ms / 1e3, linestyle='--', color=self.LATENCY_THRESHOLD_COLOR, + label="{}ms threshold".format(threshold_ms)) + + if wakeup: + df = self.df_latency_wakeup(task) + if df.empty: + self.get_logger().warning("No data to plot for wakeups") + else: + df.plot(ax=axis, style='+', label="Wakeup") - :returns: a DataFrame with statistics on task latencies + if preempt: + df = self.df_latency_preemption(task) + if df.empty: + self.get_logger().warning("No data to plot for preemption") + else: + df.plot(ax=axis, style='+', label="Preemption") + + + axis.set_title("Latencies of task \"{}\"".format(task)) + axis.set_ylabel("Latency (s)") + axis.legend() + axis.set_xlim(self._trace.x_min, self._trace.x_max) + + self.save_plot(fig, filepath) + return axis + + def _get_cdf(self, data, threshold): + """ + Build the "Cumulative Distribution Function" (CDF) for the given data """ - if not self._trace.hasEvents('sched_switch'): - self.get_logger().warning('Event [sched_switch] not found, ' - 'plot DISABLED!') - return - if not self._trace.hasEvents('sched_wakeup'): - self.get_logger().warning('Event [sched_wakeup] not found, ' - 'plot DISABLED!') - return - - # Get task data - td = self._get_task_data(task) - if not td: - return None - - # Load wakeup latencies (if required) + # Build the series of sorted values + ser = data.sort_values() + if len(ser) < 1000: + # Unbias the CDF for small populations + # https://stackoverflow.com/a/31971245/5096023 + ser = ser.append(pd.Series(ser.iloc[-1])) + + df = pd.Series(np.linspace(0., 1., len(ser)), index=ser) + + # Compute percentage of samples above/below the specified threshold + below = float(max(df[:threshold])) + above = 1 - below + return df, above, below + + @requires_events(df_latency_wakeup.required_events) + def _get_latencies_df(self, task, wakeup, preempt): wkp_df = None - if 'all' in kind or 'wakeup' in kind: - wkp_df = self.df_latency_wakeup(td.pid) - if wkp_df is not None: + prt_df = None + + if wakeup: + wkp_df = self.df_latency_wakeup(task) wkp_df.rename(columns={'wakeup_latency' : 'latency'}, inplace=True) - self.get_logger().info('Found: %5d WAKEUP latencies', len(wkp_df)) - # Load preempt latencies (if required) - prt_df = None - if 'all' in kind or 'preempt' in kind: - prt_df = self.df_latency_preemption(td.pid) - if prt_df is not None: + if preempt: + prt_df = self.df_latency_preemption(task) prt_df.rename(columns={'preempt_latency' : 'latency'}, inplace=True) - self.get_logger().info('Found: %5d PREEMPT latencies', len(prt_df)) - - if wkp_df is None and prt_df is None: - self.get_logger().warning('No Latency info for task [%s]', td.label) - return - # Join the two data frames - df = wkp_df.append(prt_df) - cdf = self._get_cdf(df.latency, (threshold_ms / 1000.)) + if wakeup and preempt: + df = wkp_df.append(prt_df) + else: + df = wkp_df or prt_df - return df, cdf + return df - @memoized - def df_latency_stats(self, task, kind='all', threshold_ms=1): + @requires_events(_get_latencies_df.required_events) + def plot_latencies_cdf(self, task, wakeup=True, preempt=True, + threshold_ms=1, filepath=None): """ - Compute statistics on latencies of the specified task. + Plot the latencies Cumulative Distribution Function of a task - :param task: the task to report latencies for - :type task: int or list(str) + :param task: The task's name or PID + :type task: int or str - :param kind: the kind of latencies to report (WAKEUP and/or PREEMPT") - :type kind: str + :param wakeup: Whether to plot wakeup latencies + :type wakeup: bool - :param threshold_ms: the minimum acceptable [ms] value to report - graphically in the generated plots - :type threshold_ms: int or float + :param preempt: Whether to plot preemption latencies + :type preempt: bool - :returns: a DataFrame with statistics on task latencies + :param threshold_ms: The latency threshold to plot + :type threshold_ms: int or float """ - # Get latency events - df, cdf = self._get_latency_df(task, kind, threshold_ms) + fig, axis = self.setup_plot() - # Return statistics - stats_df = df.describe(percentiles=[0.95, 0.99]) - label = '{:.1f}%'.format(100. * cdf.below) - stats = { label : cdf.threshold } - return stats_df.append(pd.DataFrame( - list(stats.values()), columns=['latency'], index=list(stats.keys()))) + df = self._get_latencies_df(task, wakeup, preempt) + threshold_s = threshold_ms / 1e3 + cdf_df, above, below = self._get_cdf(df.latency, threshold_s) + cdf_df.plot(ax=axis, xlim=(0, None), label="CDF") + axis.axhline(below, linestyle='--', color=self.LATENCY_THRESHOLD_COLOR, + label="Latencies below {}ms".format(threshold_ms)) + axis.axvspan(0, threshold_s, facecolor=self.LATENCY_THRESHOLD_ZONE_COLOR, + alpha=0.5, label="{}ms threshold zone".format(threshold_ms)); -############################################################################### -# Plotting Methods -############################################################################### + axis.set_title("Latencies CDF of task \"{}\"".format(task)) + axis.set_xlabel("Latency (s)") + axis.set_ylabel("Latencies below the x value (%)") + axis.legend() - def plot_latency(self, task, kind='all', tag=None, threshold_ms=1, bins=64): + self.save_plot(fig, filepath) + return axis + + @requires_events(_get_latencies_df.required_events) + def plot_latencies_histogram(self, task, wakeup=True, preempt=True, + threshold_ms=1, bins=64, filepath=None): """ - Generate a set of plots to report the WAKEUP and PREEMPT latencies the - specified task has been subject to. A WAKEUP latencies is the time from - when a task becomes RUNNABLE till the first time it gets a CPU. - A PREEMPT latencies is the time from when a RUNNING task is suspended - because of the CPU is assigned to another task till when the task - enters the CPU again. + Plot the latencies histogram of a task - :param task: the task to report latencies for - :type task: int or list(str) + :param task: The task's name or PID + :type task: int or str - :param kind: the kind of latencies to report (WAKEUP and/or PREEMPT") - :type kind: str + :param wakeup: Whether to plot wakeup latencies + :type wakeup: bool - :param tag: a string to add to the plot title - :type tag: str + :param preempt: Whether to plot preemption latencies + :type preempt: bool - :param threshold_ms: the minimum acceptable [ms] value to report - graphically in the generated plots + :param threshold_ms: The latency threshold to plot :type threshold_ms: int or float + """ + fig, axis= self.setup_plot() - :param bins: number of bins to be used for the runtime's histogram - :type bins: int + df = self._get_latencies_df(task, wakeup, preempt) + threshold_s = threshold_ms / 1e3 - :returns: a DataFrame with statistics on ploted latencies + df.latency.plot.hist(bins=bins, ax=axis, xlim=(0, 1.1 * df.latency.max())) + axis.axvspan(0, threshold_s, facecolor=self.LATENCY_THRESHOLD_ZONE_COLOR, alpha=0.5, + label="{}ms threshold zone".format(threshold_ms)); + + axis.set_title("Latencies histogram of task \"{}\"".format(task)) + axis.set_xlabel("Latency (s)") + axis.legend() + + self.save_plot(fig, filepath) + return axis + + @requires_events(df_latency_wakeup.required_events) + def plot_latency_bands(self, task, filepath=None, axis=None): """ + Draw the task wakeup/preemption latencies as colored bands - # Get latency events - df, cdf = self._get_latency_df(task, kind, threshold_ms) - self.get_logger().info('Total: %5d latency events', len(df)) - self.get_logger().info('%.1f %% samples below %d [ms] threshold', - 100. * cdf.below, threshold_ms) - - # Get task data - td = self._get_task_data(task) - if not td: - return None - - # Setup plots - gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], width_ratios=[1,1]) - plt.figure(figsize=(16, 8)) - - plot_title = "[{}]: {} latencies".format(td.label, kind.upper()) - if tag: - plot_title = "{} [{}]".format(plot_title, tag) - plot_title = "{}, threshold @ {} [ms]".format(plot_title, threshold_ms) - - # Latency events duration over time - axes = plt.subplot(gs[0,0:2]) - axes.set_title(plot_title) - try: - wkp_df.rename(columns={'latency': 'wakeup'}, inplace=True) - wkp_df.plot(style='b+', logy=True, ax=axes) - except Exception: - pass - try: - prt_df.rename(columns={'latency' : 'preempt'}, inplace=True) - prt_df.plot(style='r+', logy=True, ax=axes) - except Exception: - pass - axes.axhline(threshold_ms / 1000., linestyle='--', color='g') - self._trace.analysis.status.plot_overutilized(axes) - axes.legend(loc='lower center', ncol=2) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - - # Cumulative distribution of latencies samples - axes = plt.subplot(gs[1,0]) - cdf.df.plot(ax=axes, legend=False, xlim=(0,None), - title='Latencies CDF ({:.1f}% within {} [ms] threshold)'\ - .format(100. * cdf.below, threshold_ms)) - axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); - axes.axhline(y=cdf.below, linewidth=1, color='r', linestyle='--') - - # Histogram of all latencies - axes = plt.subplot(gs[1,1]) - ymax = 1.1 * df.latency.max() - df.latency.plot(kind='hist', bins=bins, ax=axes, - xlim=(0,ymax), legend=False, - title='Latency histogram ({} bins, {} [ms] green threshold)'\ - .format(bins, threshold_ms)); - axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); - - # Save generated plots into datadir - task_name = re.sub('[\ :/]', '_', td.label) - figname = '{}/{}task_latencies_{}_{}.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix, - td.pid, task_name) - pl.savefig(figname, bbox_inches='tight') - - - def plot_latency_bands(self, task, axes=None): + :param task: The task's name or PID + :type task: int or str + + :param axis: If provided, overlay the bands on this axis + :type axis: matplotlib.axes.Axes """ - Draw a plot that shows intervals of time when the execution of a - RUNNABLE task has been delayed. The plot reports: - WAKEUP lantecies as RED colored bands - PREEMPTION lantecies as BLUE colored bands + local_fig = axis is None - The optional axes parameter allows to plot the signal on an existing - graph. + if local_fig: + fig, axis = self.setup_plot() - :param task: the task to report latencies for - :type task: str + wkl_df = self.df_latency_wakeup(task) + prt_df = self.df_latency_preemption(task) + + def plot_bands(df, column, label): + bands = [(t, df[column][t]) for t in df.index] + color = self.get_next_color(axis) + for idx, (start, duration) in enumerate(bands): + if idx > 0: + label = None - :param axes: axes on which to plot the signal - :type axes: :mod:`matplotlib.axes.Axes` - """ - if not self._trace.hasEvents('sched_switch'): - self.get_logger().warning('Event [sched_switch] not found, ' - 'plot DISABLED!') - return - if not self._trace.hasEvents('sched_wakeup'): - self.get_logger().warning('Event [sched_wakeup] not found, ' - 'plot DISABLED!') - return - - # Get task PID - td = self._get_task_data(task) - if not td: - return None - - wkl_df = self.df_latency_wakeup(td.pid) - prt_df = self.df_latency_preemption(td.pid) - - if wkl_df is None and prt_df is None: - self.get_logger().warning('No task with name [%s]', td.label) - return - - # If not axis provided: generate a standalone plot - if not axes: - gs = gridspec.GridSpec(1, 1) - plt.figure(figsize=(16, 2)) - axes = plt.subplot(gs[0, 0]) - axes.set_title('Latencies on [{}] ' - '(red: WAKEUP, blue: PREEMPT)'\ - .format(td.label)) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_yticklabels([]) - axes.set_xlabel('Time [s]') - axes.grid(True) - - # Draw WAKEUP latencies - try: - bands = [(t, wkl_df['wakeup_latency'][t]) for t in wkl_df.index] - for (start, duration) in bands: - end = start + duration - axes.axvspan(start, end, facecolor='r', alpha=0.1) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - except Exception: - pass - - # Draw PREEMPTION latencies - try: - bands = [(t, prt_df['preempt_latency'][t]) for t in prt_df.index] - for (start, duration) in bands: end = start + duration - axes.axvspan(start, end, facecolor='b', alpha=0.1) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - except Exception: - pass + axis.axvspan(start, end, facecolor=color, alpha=0.5, + label=label) - def plot_activations(self, task, tag=None, threshold_ms=16, bins=64): - """ - Plots "activation intervals" for the specified task - - An "activation interval" is time incurring between two consecutive - wakeups of a task. A set of plots is generated to report: - - Activations interval at wakeup time: every time a task wakeups a - point is plotted to represent the time interval since the previous - wakeup. - - Activations interval cumulative function: reports the cumulative - function of the activation intervals. - - Activations intervals histogram: reports a 64 bins histogram of - the activation intervals. - - All plots are parameterized based on the value of threshold_ms, which - can be used to filter activations intervals bigger than 2 times this - value. - Such a threshold is useful to filter out from the plots outliers thus - focusing the analysis in the most critical periodicity under analysis. - The number and percentage of discarded samples is reported in output. - A default threshold of 16 [ms] is used, which is useful for example - to analyze a 60Hz rendering pipelines. - - A PNG of the generated plots is generated and saved in the same folder - where the trace is. - - :param task: the task to report latencies for - :type task: int or list(str) - - :param tag: a string to add to the plot title - :type tag: str - - :param threshold_ms: the minimum acceptable [ms] value to report - graphically in the generated plots - :type threshold_ms: int or float + plot_bands(wkl_df, "wakeup_latency", "Wakeup latencies") + plot_bands(prt_df, "preempt_latency", "Preemption latencies") + axis.legend() + axis.set_xlim(self._trace.x_min, self._trace.x_max) - :param bins: number of bins to be used for the runtime's histogram - :type bins: int + if local_fig: + self.save_plot(fig, filepath) - :returns: a DataFrame with statistics on ploted activation intervals - """ + return axis - if not self._trace.hasEvents('sched_switch'): - self.get_logger().warning('Event [sched_switch] not found, ' - 'plot DISABLED!') - return - if not self._trace.hasEvents('sched_wakeup'): - self.get_logger().warning('Event [sched_wakeup] not found, ' - 'plot DISABLED!') - return - - # Get task data - td = self._get_task_data(task) - if not td: - return None - - # Load activation data - wkp_df = self.df_activations(td.pid) - if wkp_df is None: - return None - self.get_logger().info('Found: %5d activations for [%s]', - len(wkp_df), td.label) - - # Disregard data above two time the specified threshold - y_max = (2 * threshold_ms) / 1000. - len_tot = len(wkp_df) - wkp_df = wkp_df[wkp_df.activation_interval <= y_max] - len_plt = len(wkp_df) - if len_plt < len_tot: - len_dif = len_tot - len_plt - len_pct = 100. * len_dif / len_tot - self.get_logger().warning('Discarding {} activation intervals (above 2 x threshold_ms, ' - '{:.1f}% of the overall activations)'\ - .format(len_dif, len_pct)) - ymax = 1.1 * wkp_df.activation_interval.max() - - # Build the series for the CDF - cdf = self._get_cdf(wkp_df.activation_interval, (threshold_ms / 1000.)) - self.get_logger().info('%.1f %% samples below %d [ms] threshold', - 100. * cdf.below, threshold_ms) - - # Setup plots - gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], width_ratios=[1,1]) - plt.figure(figsize=(16, 8)) - - plot_title = "[{}]: activaton intervals (@ wakeup time)".format(td.label) - if tag: - plot_title = "{} [{}]".format(plot_title, tag) - plot_title = "{}, threshold @ {} [ms]".format(plot_title, threshold_ms) - - # Activations intervals over time - axes = plt.subplot(gs[0,0:2]) - axes.set_title(plot_title) - wkp_df.plot(style='g+', logy=False, ax=axes) - - axes.axhline(threshold_ms / 1000., linestyle='--', color='g') - self._trace.analysis.status.plot_overutilized(axes) - axes.legend(loc='lower center', ncol=2) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - - # Cumulative distribution of all activations intervals - axes = plt.subplot(gs[1,0]) - cdf.df.plot(ax=axes, legend=False, xlim=(0,None), - title='Activations CDF ({:.1f}% within {} [ms] threshold)'\ - .format(100. * cdf.below, threshold_ms)) - axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); - axes.axhline(y=cdf.below, linewidth=1, color='r', linestyle='--') - - # Histogram of all activations intervals - axes = plt.subplot(gs[1,1]) - wkp_df.plot(kind='hist', bins=bins, ax=axes, - xlim=(0,ymax), legend=False, - title='Activation intervals histogram ({} bins, {} [ms] green threshold)'\ - .format(bins, threshold_ms)); - axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); - - # Save generated plots into datadir - task_name = re.sub('[\ :/]', '_', td.label) - figname = '{}/{}task_activations_{}_{}.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix, - td.pid, task_name) - pl.savefig(figname, bbox_inches='tight') - - # Return statistics - stats_df = wkp_df.describe(percentiles=[0.95, 0.99]) - label = '{:.1f}%'.format(100. * cdf.below) - stats = { label : cdf.threshold } - return stats_df.append(pd.DataFrame( - list(stats.values()), columns=['activation_interval'], index=list(stats.keys()))) - - - def plot_runtimes(self, task, tag=None, threshold_ms=8, bins=64): + @requires_events(df_activations.required_events) + def plot_activations(self, task, filepath=None): """ - Plots "running times" for the specified task - - A "running time" is the sum of all the time intervals a task executed - in between a wakeup and the next sleep (or exit). - A set of plots is generated to report: - - Running times at block time: every time a task blocks a - point is plotted to represent the cumulative time the task has be - running since its last wakeup - - Running time cumulative function: reports the cumulative - function of the running times. - - Running times histogram: reports a 64 bins histogram of - the running times. - - All plots are parameterized based on the value of threshold_ms, which - can be used to filter running times bigger than 2 times this value. - Such a threshold is useful to filter out from the plots outliers thus - focusing the analysis in the most critical periodicity under analysis. - The number and percentage of discarded samples is reported in output. - A default threshold of 16 [ms] is used, which is useful for example to - analyze a 60Hz rendering pipelines. - - A PNG of the generated plots is generated and saved in the same folder - where the trace is. - - :param task: the task to report latencies for - :type task: int or list(str) - - :param tag: a string to add to the plot title - :type tag: str - - :param threshold_ms: the minimum acceptable [ms] value to report - graphically in the generated plots - :type threshold_ms: int or float - - :param bins: number of bins to be used for the runtime's histogram - :type bins: int + Plot the :meth:`lisa.analysis.latency.LatencyAnalysis.df_activations` of a task - :returns: a DataFrame with statistics on ploted running times + :param task: The task's name or PID + :type task: int or str """ + fig, axis = self.setup_plot() - if not self._trace.hasEvents('sched_switch'): - self.get_logger().warning('Event [sched_switch] not found, ' - 'plot DISABLED!') - return - if not self._trace.hasEvents('sched_wakeup'): - self.get_logger().warning('Event [sched_wakeup] not found, ' - 'plot DISABLED!') - return - - # Get task data - td = self._get_task_data(task) - if not td: - return None - - # Load runtime data - run_df = self.df_runtimes(td.pid) - if run_df is None: - return None - self.get_logger().info('Found: %5d activations for [%s]', - len(run_df), td.label) - - # Disregard data above two time the specified threshold - y_max = (2 * threshold_ms) / 1000. - len_tot = len(run_df) - run_df = run_df[run_df.running_time <= y_max] - len_plt = len(run_df) - if len_plt < len_tot: - len_dif = len_tot - len_plt - len_pct = 100. * len_dif / len_tot - self.get_logger().warning('Discarding {} running times (above 2 x threshold_ms, ' - '{:.1f}% of the overall activations)'\ - .format(len_dif, len_pct)) - ymax = 1.1 * run_df.running_time.max() - - # Build the series for the CDF - cdf = self._get_cdf(run_df.running_time, (threshold_ms / 1000.)) - self.get_logger().info('%.1f %% samples below %d [ms] threshold', - 100. * cdf.below, threshold_ms) - - # Setup plots - gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], width_ratios=[1,1]) - plt.figure(figsize=(16, 8)) - - plot_title = "[{}]: running times (@ block time)".format(td.label) - if tag: - plot_title = "{} [{}]".format(plot_title, tag) - plot_title = "{}, threshold @ {} [ms]".format(plot_title, threshold_ms) - - # Running time over time - axes = plt.subplot(gs[0,0:2]) - axes.set_title(plot_title) - run_df.plot(style='g+', logy=False, ax=axes) - - axes.axhline(threshold_ms / 1000., linestyle='--', color='g') - self._trace.analysis.status.plot_overutilized(axes) - axes.legend(loc='lower center', ncol=2) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - - # Cumulative distribution of all running times - axes = plt.subplot(gs[1,0]) - cdf.df.plot(ax=axes, legend=False, xlim=(0,None), - title='Runtime CDF ({:.1f}% within {} [ms] threshold)'\ - .format(100. * cdf.below, threshold_ms)) - axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); - axes.axhline(y=cdf.below, linewidth=1, color='r', linestyle='--') - - # Histogram of all running times - axes = plt.subplot(gs[1,1]) - run_df.plot(kind='hist', bins=bins, ax=axes, - xlim=(0,ymax), legend=False, - title='Latency histogram ({} bins, {} [ms] green threshold)'\ - .format(bins, threshold_ms)); - axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); - - # Save generated plots into datadir - task_name = re.sub('[\ :/]', '_', td.label) - figname = '{}/{}task_runtimes_{}_{}.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix, - td.pid, task_name) - pl.savefig(figname, bbox_inches='tight') - - # Return statistics - stats_df = run_df.describe(percentiles=[0.95, 0.99]) - label = '{:.1f}%'.format(100. * cdf.below) - stats = { label : cdf.threshold } - return stats_df.append(pd.DataFrame( - list(stats.values()), columns=['running_time'], index=list(stats.keys()))) - -############################################################################### -# Utility Methods -############################################################################### + wkp_df = self.df_activations(task) - @memoized - def _get_task_data(self, task): - - # Get task PID - if isinstance(task, str): - task_pids = self._trace.get_task_by_name(task) - if len(task_pids) == 0: - self.get_logger().warning('No tasks found with name [%s]', task) - return None - - task_pid = task_pids[0] - if len(task_pids) > 1: - self.get_logger().warning('Multiple PIDs for task named [%s]', task) - for pid in task_pids: - self.get_logger().warning(' %5d : %s', pid, - ','.join(self._trace.get_task_by_pid(pid))) - self.get_logger().warning('Returning stats only for PID: %d', - task_pid) - task_name = self._trace.get_task_by_pid(task_pid) - - # Get task name - elif isinstance(task, int): - task_pid = task - task_name = self._trace.get_task_by_pid(task_pid) - if task_name is None: - self.get_logger().warning('No tasks found with name [%s]', task) - return None + wkp_df.plot(style='+', logy=False, ax=axis) - else: - raise ValueError("Task must be either an int or str") + axis.set_title("Activation intervals of task \"{}\"".format(task)) + self._trace.analysis.status.plot_overutilized(axis) + axis.set_xlim(self._trace.x_min, self._trace.x_max) - task_label = "{}: {}".format(task_pid, task_name) - return TaskData(task_pid, task_name, task_label) + self.save_plot(fig, filepath) + return axis - def _get_cdf(self, data, threshold): + @requires_events(df_runtimes.required_events) + def plot_runtimes(self, task, filepath=None): """ - Build the "Cumulative Distribution Function" (CDF) for the given data + Plot the :meth:`lisa.analysis.latency.LatencyAnalysis.df_runtimes` of a task + + :param task: The task's name or PID + :type task: int or str """ + fig, axis = self.setup_plot() - # Build the series of sorted values - ser = data.sort_values() - if len(ser) < 1000: - # Append again the last (and largest) value. - # This step is important especially for small sample sizes - # in order to get an unbiased CDF - ser = ser.append(pd.Series(ser.iloc[-1])) - df = pd.Series(np.linspace(0., 1., len(ser)), index=ser) + df = self.df_runtimes(task) - # Compute percentage of samples above/below the specified threshold - below = float(max(df[:threshold])) - above = 1 - below - return CDF(df, threshold, above, below) + df.plot(style='+', ax=axis) + + axis.set_title("Per-activation runtimes of task \"{}\"".format(task)) + self._trace.analysis.status.plot_overutilized(axis) + axis.set_xlim(self._trace.x_min, self._trace.x_max) + self.save_plot(fig, filepath) + return axis # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 -- GitLab From 34f8883fe475a6a41718782fd16df5700b41a3e6 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 29 Nov 2018 17:45:35 +0000 Subject: [PATCH 38/56] doc/analysis: Add latency analysis --- doc/analysis.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/analysis.rst b/doc/analysis.rst index f4f0b3aa2..1760aa733 100644 --- a/doc/analysis.rst +++ b/doc/analysis.rst @@ -48,3 +48,9 @@ Idle .. automodule:: lisa.analysis.idle :members: + +Latency +======= + +.. automodule:: lisa.analysis.latency + :members: -- GitLab From 88afb535780c56cb1ea4a1479855885cf0245a52 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 29 Nov 2018 17:44:58 +0000 Subject: [PATCH 39/56] analysis/tasks: Use sched_wakeup_new in df_task_states() --- lisa/analysis/tasks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lisa/analysis/tasks.py b/lisa/analysis/tasks.py index 3e40eb12e..5c602cdf7 100644 --- a/lisa/analysis/tasks.py +++ b/lisa/analysis/tasks.py @@ -220,6 +220,10 @@ class TasksAnalysis(AnalysisBase): wk_df = self._trace.df_events('sched_wakeup') sw_df = self._trace.df_events('sched_switch') + if "sched_wakeup_new" in self._trace.events: + wkn_df = self._trace.df_events('sched_wakeup_new') + wk_df = pd.concat([wk_df, wkn_df]).sort_index() + task_wakeup = wk_df[wk_df.pid == pid][['target_cpu', '__cpu']] task_wakeup['curr_state'] = TaskState.TASK_WAKING.char -- GitLab From aa63bc20dddc797b5ccaa3cfe394dc3370210399 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 29 Nov 2018 19:04:24 +0000 Subject: [PATCH 40/56] analysis: Uniformize colour coding Make similar signals have the same colour regardless of the cpu/cluster/whatevs they belong do. --- lisa/analysis/frequency.py | 14 +++++--------- lisa/analysis/idle.py | 3 +-- lisa/analysis/load_tracking.py | 1 - 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index a2dbe27fa..52d4ad295 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -320,7 +320,8 @@ class FrequencyAnalysis(AnalysisBase): ax=axis, drawstyle='steps-post') if avg > 0: - axis.axhline(avg, color='r', linestyle='--', label="average") + axis.axhline(avg, color=self.get_next_color(axis), linestyle='--', + label="average") axis.set_ylim(frequencies[0] * 0.9, frequencies[-1] * 1.1) axis.set_xlim(self._trace.x_min, self._trace.x_max) @@ -353,7 +354,6 @@ class FrequencyAnalysis(AnalysisBase): for idx, domain in enumerate(domains): axis = axes[idx] if len(domains) > 1 else axes - self.cycle_colors(axis, idx) self.plot_cpu_frequencies(domain[0], filepath, axis) axis.set_title('Frequencies of CPUS {}'.format(domain)) @@ -391,10 +391,10 @@ class FrequencyAnalysis(AnalysisBase): total_df = total_df * 100 / total_df.sum() active_df = active_df * 100 / active_df.sum() - total_df.plot.barh(ax=axes[0], color=self.get_next_color(axes[0])) + total_df.plot.barh(ax=axes[0]) axes[0].set_title("CPU{} total frequency residency".format(cpu)) - active_df.plot.barh(ax=axes[1], color=self.get_next_color(axes[0])) + active_df.plot.barh(ax=axes[1]) axes[1].set_title("CPU{} active frequency residency".format(cpu)) for axis in axes: @@ -425,9 +425,6 @@ class FrequencyAnalysis(AnalysisBase): for idx, domain in enumerate(domains): local_axes = axes[2 * idx : 2 * (idx + 1)] - for ax in local_axes: - self.cycle_colors(ax, 2 * idx) - self.plot_cpu_frequency_residency(domain[0], filepath, pct, local_axes) for axis in local_axes: title = axis.get_title() @@ -458,7 +455,7 @@ class FrequencyAnalysis(AnalysisBase): if pct: df = df * 100 / df.sum() - df["transitions"].plot.barh(ax=axis, color=self.get_next_color(axis)) + df["transitions"].plot.barh(ax=axis) axis.set_title('Frequency transitions of CPU{}'.format(cpu)) @@ -489,7 +486,6 @@ class FrequencyAnalysis(AnalysisBase): for idx, domain in enumerate(domains): axis = axes[idx] - self.cycle_colors(axis, idx) self.plot_cpu_frequency_transitions(domain[0], filepath, pct, axis) diff --git a/lisa/analysis/idle.py b/lisa/analysis/idle.py index 722e751b8..d24059681 100644 --- a/lisa/analysis/idle.py +++ b/lisa/analysis/idle.py @@ -318,7 +318,6 @@ class IdleAnalysis(AnalysisBase): for idx, cluster in enumerate(clusters): axis = axes[idx] - self.cycle_colors(axis, idx) self.plot_cluster_idle_state_residency(cluster, pct=pct, axis=axis) @@ -337,7 +336,7 @@ class IdleAnalysis(AnalysisBase): if pct: df = df * 100 / df.sum() - df["time"].plot.barh(ax=axis, color=self.get_next_color(axis)) + df["time"].plot.barh(ax=axis) if pct: axis.set_xlabel("Time share (%)") diff --git a/lisa/analysis/load_tracking.py b/lisa/analysis/load_tracking.py index fa98ede4b..37b0ae45a 100644 --- a/lisa/analysis/load_tracking.py +++ b/lisa/analysis/load_tracking.py @@ -183,7 +183,6 @@ class LoadTrackingAnalysis(AnalysisBase): for idx, cpu in enumerate(cpus): axis = axes[cpu] if len(cpus) > 1 else axes - self.cycle_colors(axis, 2 * idx) # Add CPU utilization axis.set_title('CPU{}'.format(cpu)) -- GitLab From dd1963a768e05e035054c4e3fb1aba193a81d41a Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 29 Nov 2018 19:06:57 +0000 Subject: [PATCH 41/56] analysis: Remove EAS analysis Suggested-by: Patrick Bellasi --- lisa/analysis/eas.py | 403 ------------------------------------------- 1 file changed, 403 deletions(-) delete mode 100644 lisa/analysis/eas.py diff --git a/lisa/analysis/eas.py b/lisa/analysis/eas.py deleted file mode 100644 index f44e6e55a..000000000 --- a/lisa/analysis/eas.py +++ /dev/null @@ -1,403 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# -# Copyright (C) 2015, ARM Limited and contributors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" EAS-specific Analysis Module """ - -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt -import pylab as pl - -from lisa.analysis.base import AnalysisBase - - -class EasAnalysis(AnalysisBase): - """ - Support for EAS signals anaysis - - :param trace: input Trace object - :type trace: :class:`trace.Trace` - """ - - name = 'eas' - - def __init__(self, trace): - super(EasAnalysis, self).__init__(trace) - -############################################################################### -# DataFrame Getter Methods -############################################################################### - - -############################################################################### -# Plotting Methods -############################################################################### - - def plot_e_diff_time(self, tasks=None, - min_usage_delta=None, max_usage_delta=None, - min_cap_delta=None, max_cap_delta=None, - min_nrg_delta=None, max_nrg_delta=None, - min_nrg_diff=None, max_nrg_diff=None): - """ - Plot energy_diff()-related signals on time axes. - """ - if not self._trace.hasEvents('sched_energy_diff'): - self.get_logger().warning('Event [sched_energy_diff] not found, plot DISABLED!') - return - df = self._trace.df_events('sched_energy_diff') - - # Filter on 'tasks' - if tasks is not None: - self.get_logger().info('Plotting EDiff data just for task(s) [%s]', tasks) - df = df[df['comm'].isin(tasks)] - - # Filter on 'usage_delta' - if min_usage_delta is not None: - self.get_logger().info('Plotting EDiff data just with minimum ' - 'usage_delta of [%d]', min_usage_delta) - df = df[abs(df['usage_delta']) >= min_usage_delta] - if max_usage_delta is not None: - self.get_logger().info('Plotting EDiff data just with maximum ' - 'usage_delta of [%d]', max_usage_delta) - df = df[abs(df['usage_delta']) <= max_usage_delta] - - # Filter on 'cap_delta' - if min_cap_delta is not None: - self.get_logger().info('Plotting EDiff data just with minimum ' - 'cap_delta of [%d]', min_cap_delta) - df = df[abs(df['cap_delta']) >= min_cap_delta] - if max_cap_delta is not None: - self.get_logger().info('Plotting EDiff data just with maximum ' - 'cap_delta of [%d]', max_cap_delta) - df = df[abs(df['cap_delta']) <= max_cap_delta] - - # Filter on 'nrg_delta' - if min_nrg_delta is not None: - self.get_logger().info('Plotting EDiff data just with minimum ' - 'nrg_delta of [%d]', min_nrg_delta) - df = df[abs(df['nrg_delta']) >= min_nrg_delta] - if max_nrg_delta is not None: - self.get_logger().info('Plotting EDiff data just with maximum ' - 'nrg_delta of [%d]', max_nrg_delta) - df = df[abs(df['nrg_delta']) <= max_nrg_delta] - - # Filter on 'nrg_diff' - if min_nrg_diff is not None: - self.get_logger().info('Plotting EDiff data just with minimum ' - 'nrg_diff of [%d]', min_nrg_diff) - df = df[abs(df['nrg_diff']) >= min_nrg_diff] - if max_nrg_diff is not None: - self.get_logger().info('Plotting EDiff data just with maximum ' - 'nrg_diff of [%d]', max_nrg_diff) - df = df[abs(df['nrg_diff']) <= max_nrg_diff] - - # Grid: setup stats for gris - gs = gridspec.GridSpec(4, 3, height_ratios=[2, 4, 2, 4]) - gs.update(wspace=0.1, hspace=0.1) - - # Configure plot - fig = plt.figure(figsize=(16, 8*2+4*2+2)) - plt.suptitle("EnergyDiff Data", - y=.92, fontsize=16, horizontalalignment='center') - - # Plot1: src and dst CPUs - axes = plt.subplot(gs[0, :]) - axes.set_title('Source and Destination CPUs') - df[['src_cpu', 'dst_cpu']].plot(ax=axes, style=['bo', 'r+']) - axes.set_ylim(-1, self._trace.plat_info['cpus-count']+1) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.grid(True) - axes.set_xticklabels([]) - axes.set_xlabel('') - self._trace.analysis.status.plot_overutilized(axes) - - # Plot2: energy and capacity variations - axes = plt.subplot(gs[1, :]) - axes.set_title('Energy vs Capacity Variations') - - colors_labels = list(zip('gbyr', ['Optimal Accept', 'SchedTune Accept', - 'SchedTune Reject', 'Suboptimal Reject'])) - for color, label in colors_labels: - subset = df[df.nrg_payoff_group == label] - if len(subset) == 0: - continue - subset[['nrg_diff_pct']].plot(ax=axes, style=[color+'o']) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_yscale('symlog') - axes.grid(True) - axes.set_xticklabels([]) - axes.set_xlabel('') - self._trace.analysis.status.plot_overutilized(axes) - - # Plot3: energy payoff - axes = plt.subplot(gs[2, :]) - axes.set_title('Energy Payoff Values') - for color, label in colors_labels: - subset = df[df.nrg_payoff_group == label] - if len(subset) == 0: - continue - subset[['nrg_payoff']].plot(ax=axes, style=[color+'o']) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_yscale('symlog') - axes.grid(True) - axes.set_xticklabels([]) - axes.set_xlabel('') - self._trace.analysis.status.plot_overutilized(axes) - - # Plot4: energy deltas (kernel and host computed values) - axes = plt.subplot(gs[3, :]) - axes.set_title('Energy Deltas Values') - df[['nrg_delta', 'nrg_diff_pct']].plot(ax=axes, style=['ro', 'b+']) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.grid(True) - self._trace.analysis.status.plot_overutilized(axes) - - # Save generated plots into datadir - figname = '{}/{}ediff_time.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix) - pl.savefig(figname, bbox_inches='tight') - - # Grid: setup stats for gris - gs = gridspec.GridSpec(1, 3, height_ratios=[2]) - gs.update(wspace=0.1, hspace=0.1) - - fig = plt.figure(figsize=(16, 4)) - - # Plot: usage, capacity and energy distributuions - axes = plt.subplot(gs[0, 0]) - df[['usage_delta']].hist(ax=axes, bins=60) - axes = plt.subplot(gs[0, 1]) - df[['cap_delta']].hist(ax=axes, bins=60) - axes = plt.subplot(gs[0, 2]) - df[['nrg_delta']].hist(ax=axes, bins=60) - - # Save generated plots into datadir - figname = '{}/{}ediff_stats.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix) - pl.savefig(figname, bbox_inches='tight') - - def plot_e_diff_space(self, tasks=None, - min_usage_delta=None, max_usage_delta=None, - min_cap_delta=None, max_cap_delta=None, - min_nrg_delta=None, max_nrg_delta=None, - min_nrg_diff=None, max_nrg_diff=None): - """ - Plot energy_diff()-related signals on the Performance-Energy space - (PxE). - """ - if not self._trace.hasEvents('sched_energy_diff'): - self.get_logger().warning('Event [sched_energy_diff] not found, plot DISABLED!') - return - df = self._trace.df_events('sched_energy_diff') - - # Filter on 'tasks' - if tasks is not None: - self.get_logger().info('Plotting EDiff data just for task(s) [%s]', tasks) - df = df[df['comm'].isin(tasks)] - - # Filter on 'usage_delta' - if min_usage_delta is not None: - self.get_logger().info('Plotting EDiff data just with minimum ' - 'usage_delta of [%d]', min_usage_delta) - df = df[abs(df['usage_delta']) >= min_usage_delta] - if max_usage_delta is not None: - self.get_logger().info('Plotting EDiff data just with maximum ' - 'usage_delta of [%d]', max_usage_delta) - df = df[abs(df['usage_delta']) <= max_usage_delta] - - # Filter on 'cap_delta' - if min_cap_delta is not None: - self.get_logger().info('Plotting EDiff data just with minimum ' - 'cap_delta of [%d]', min_cap_delta) - df = df[abs(df['cap_delta']) >= min_cap_delta] - if max_cap_delta is not None: - self.get_logger().info('Plotting EDiff data just with maximum ' - 'cap_delta of [%d]', max_cap_delta) - df = df[abs(df['cap_delta']) <= max_cap_delta] - - # Filter on 'nrg_delta' - if min_nrg_delta is not None: - self.get_logger().info('Plotting EDiff data just with minimum ' - 'nrg_delta of [%d]', min_nrg_delta) - df = df[abs(df['nrg_delta']) >= min_nrg_delta] - if max_nrg_delta is not None: - self.get_logger().info('Plotting EDiff data just with maximum ' - 'nrg_delta of [%d]', max_nrg_delta) - df = df[abs(df['nrg_delta']) <= max_nrg_delta] - - # Filter on 'nrg_diff' - if min_nrg_diff is not None: - self.get_logger().info('Plotting EDiff data just with minimum ' - 'nrg_diff of [%d]', min_nrg_diff) - df = df[abs(df['nrg_diff']) >= min_nrg_diff] - if max_nrg_diff is not None: - self.get_logger().info('Plotting EDiff data just with maximum ' - 'nrg_diff of [%d]', max_nrg_diff) - df = df[abs(df['nrg_diff']) <= max_nrg_diff] - - # Grid: setup grid for P-E space - gs = gridspec.GridSpec(1, 2, height_ratios=[2]) - gs.update(wspace=0.1, hspace=0.1) - - fig = plt.figure(figsize=(16, 8)) - - # Get min-max of each axes - x_min = df.nrg_diff_pct.min() - x_max = df.nrg_diff_pct.max() - y_min = df.cap_delta.min() - y_max = df.cap_delta.max() - axes_min = min(x_min, y_min) - axes_max = max(x_max, y_max) - - # # Tag columns by usage_delta - # ccol = df.usage_delta - # df['usage_delta_group'] = np.select( - # [ccol < 150, ccol < 400, ccol < 600], - # ['< 150', '< 400', '< 600'], '>= 600') - # - # # Tag columns by nrg_payoff - # ccol = df.nrg_payoff - # df['nrg_payoff_group'] = np.select( - # [ccol > 2e9, ccol > 0, ccol > -2e9], - # ['Optimal Accept', 'SchedTune Accept', 'SchedTune Reject'], - # 'Suboptimal Reject') - - # Plot: per usage_delta values - axes = plt.subplot(gs[0, 0]) - - for color, label in zip('bgyr', ['< 150', '< 400', '< 600', '>= 600']): - subset = df[df.usage_delta_group == label] - if len(subset) == 0: - continue - plt.scatter(subset.nrg_diff_pct, subset.cap_delta, - s=subset.usage_delta, - c=color, label='task_usage ' + str(label), - axes=axes) - - # Plot space axes - plt.plot((0, 0), (-1025, 1025), 'y--', axes=axes) - plt.plot((-1025, 1025), (0, 0), 'y--', axes=axes) - - # # Perf cuts - # plt.plot((0, 100), (0, 100*delta_pb), 'b--', - # label='PB (Perf Boost)') - # plt.plot((0, -100), (0, -100*delta_pc), 'r--', - # label='PC (Perf Constraint)') - # - # # Perf boost setups - # for y in range(0,6): - # plt.plot((0, 500), (0,y*100), 'g:') - # for x in range(0,5): - # plt.plot((0, x*100), (0,500), 'g:') - - axes.legend(loc=4, borderpad=1) - - plt.xlim(1.1*axes_min, 1.1*axes_max) - plt.ylim(1.1*axes_min, 1.1*axes_max) - - # axes.title('Performance-Energy Space') - axes.set_xlabel('Energy diff [%]') - axes.set_ylabel('Capacity diff [%]') - - # Plot: per usage_delta values - axes = plt.subplot(gs[0, 1]) - - colors_labels = list(zip('gbyr', ['Optimal Accept', 'SchedTune Accept', - 'SchedTune Reject', 'Suboptimal Reject'])) - for color, label in colors_labels: - subset = df[df.nrg_payoff_group == label] - if len(subset) == 0: - continue - plt.scatter(subset.nrg_diff_pct, subset.cap_delta, - s=60, - c=color, - marker='+', - label='{} Region'.format(label), - axes=axes) - # s=subset.usage_delta, - - # Plot space axes - plt.plot((0, 0), (-1025, 1025), 'y--', axes=axes) - plt.plot((-1025, 1025), (0, 0), 'y--', axes=axes) - - # # Perf cuts - # plt.plot((0, 100), (0, 100*delta_pb), 'b--', - # label='PB (Perf Boost)') - # plt.plot((0, -100), (0, -100*delta_pc), 'r--', - # label='PC (Perf Constraint)') - # - # # Perf boost setups - # for y in range(0,6): - # plt.plot((0, 500), (0,y*100), 'g:') - # for x in range(0,5): - # plt.plot((0, x*100), (0,500), 'g:') - - axes.legend(loc=4, borderpad=1) - - plt.xlim(1.1*axes_min, 1.1*axes_max) - plt.ylim(1.1*axes_min, 1.1*axes_max) - - # axes.title('Performance-Energy Space') - axes.set_xlabel('Energy diff [%]') - axes.set_ylabel('Capacity diff [%]') - - plt.title('Performance-Energy Space') - - # Save generated plots into datadir - figname = '{}/{}ediff_space.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix) - pl.savefig(figname, bbox_inches='tight') - - def plot_sched_tune_conf(self): - """ - Plot the configuration of SchedTune. - """ - if not self._trace.hasEvents('sched_tune_config'): - self.get_logger().warning('Event [sched_tune_config] not found, plot DISABLED!') - return - # Grid - gs = gridspec.GridSpec(2, 1, height_ratios=[4, 1]) - gs.update(wspace=0.1, hspace=0.1) - - # Figure - plt.figure(figsize=(16, 2*6)) - plt.suptitle("SchedTune Configuration", - y=.97, fontsize=16, horizontalalignment='center') - - # Plot: Margin - axes = plt.subplot(gs[0, 0]) - axes.set_title('Margin') - data = self._trace.df_events('sched_tune_config')[['margin']] - data.plot(ax=axes, drawstyle='steps-post', style=['b']) - axes.set_ylim(0, 110) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.xaxis.set_visible(False) - - # Plot: Boost mode - axes = plt.subplot(gs[1, 0]) - axes.set_title('Boost mode') - data = self._trace.df_events('sched_tune_config')[['boostmode']] - data.plot(ax=axes, drawstyle='steps-post') - axes.set_ylim(0, 4) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.xaxis.set_visible(True) - - # Save generated plots into datadir - figname = '{}/{}schedtune_conf.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix) - pl.savefig(figname, bbox_inches='tight') - -# vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 -- GitLab From 581efa436b82a1d04bdc30ccb122e1e1d25e3576 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 13:43:51 +0000 Subject: [PATCH 42/56] analysis: Rework status analysis --- lisa/analysis/frequency.py | 6 ++- lisa/analysis/latency.py | 12 +++++- lisa/analysis/load_tracking.py | 14 +++++-- lisa/analysis/status.py | 67 ++++++++++++++++------------------ lisa/analysis/tasks.py | 6 ++- 5 files changed, 60 insertions(+), 45 deletions(-) diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index 52d4ad295..cb6cae7f6 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -323,6 +323,10 @@ class FrequencyAnalysis(AnalysisBase): axis.axhline(avg, color=self.get_next_color(axis), linestyle='--', label="average") + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) + axis.set_ylim(frequencies[0] * 0.9, frequencies[-1] * 1.1) axis.set_xlim(self._trace.x_min, self._trace.x_max) @@ -333,8 +337,6 @@ class FrequencyAnalysis(AnalysisBase): axis.grid(True) axis.legend() - self._trace.analysis.status.plot_overutilized(axis) - if local_fig: self.save_plot(fig, filepath) diff --git a/lisa/analysis/latency.py b/lisa/analysis/latency.py index 3c201c036..ecd8f037d 100644 --- a/lisa/analysis/latency.py +++ b/lisa/analysis/latency.py @@ -395,8 +395,12 @@ class LatencyAnalysis(AnalysisBase): wkp_df.plot(style='+', logy=False, ax=axis) + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) + axis.set_title("Activation intervals of task \"{}\"".format(task)) - self._trace.analysis.status.plot_overutilized(axis) + axis.set_xlim(self._trace.x_min, self._trace.x_max) self.save_plot(fig, filepath) @@ -416,8 +420,12 @@ class LatencyAnalysis(AnalysisBase): df.plot(style='+', ax=axis) + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) + axis.set_title("Per-activation runtimes of task \"{}\"".format(task)) - self._trace.analysis.status.plot_overutilized(axis) + axis.set_xlim(self._trace.x_min, self._trace.x_max) self.save_plot(fig, filepath) diff --git a/lisa/analysis/load_tracking.py b/lisa/analysis/load_tracking.py index 37b0ae45a..229c1d5c4 100644 --- a/lisa/analysis/load_tracking.py +++ b/lisa/analysis/load_tracking.py @@ -203,7 +203,9 @@ class LoadTrackingAnalysis(AnalysisBase): drawstyle='steps-post') # Add overutilized signal to the plot - self._trace.analysis.status.plot_overutilized(axis) + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) axis.set_ylim(0, 1100) axis.set_xlim(self._trace.x_min, self._trace.x_max) @@ -236,6 +238,10 @@ class LoadTrackingAnalysis(AnalysisBase): df[['util']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) df[['load']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) + axis.set_title('Load-tracking signals of task "{}"'.format(task)) axis.legend() axis.grid(True) @@ -319,14 +325,16 @@ class LoadTrackingAnalysis(AnalysisBase): for stat in df["placement"].unique(): df[df.placement == stat]["__cpu"].plot(ax=axis, style="+", label=stat) + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) + axis.set_title("Utilization vs placement of task \"{}\"".format(task)) axis.set_xlim(self._trace.x_min, self._trace.x_max) axis.grid(True) axis.legend() - self._trace.analysis.status.plot_overutilized(axis) - self.save_plot(fig, filepath) return axis diff --git a/lisa/analysis/status.py b/lisa/analysis/status.py index 95a2d594c..858b28e26 100644 --- a/lisa/analysis/status.py +++ b/lisa/analysis/status.py @@ -19,11 +19,7 @@ """ System Status Analaysis Module """ -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt - -from lisa.analysis.base import AnalysisBase - +from lisa.analysis.base import AnalysisBase, requires_events class StatusAnalysis(AnalysisBase): """ @@ -43,13 +39,16 @@ class StatusAnalysis(AnalysisBase): # DataFrame Getter Methods ############################################################################### + @requires_events(["sched_overutilized"]) def df_overutilized(self): """ - Get data frame with sched_overutilized data. - """ - if not self._trace.hasEvents('sched_overutilized'): - return None + Get overutilized events + :returns: A :class:`pandas.DataFrame` with: + + * A ``overutilized`` column (the overutilized status at a given time) + * A ``len`` column (the time spent in that overutilized status) + """ # Build sequence of overutilization "bands" df = self._trace.df_events('sched_overutilized') @@ -61,7 +60,6 @@ class StatusAnalysis(AnalysisBase): # df = df.reset_index()\ # .drop_duplicates(subset='Time', keep='last')\ # .set_index('Time') - return df[['len', 'overutilized']] @@ -69,44 +67,41 @@ class StatusAnalysis(AnalysisBase): # Plotting Methods ############################################################################### - def plot_overutilized(self, axes=None): + @requires_events(df_overutilized.required_events) + def plot_overutilized(self, filepath=None, axis=None): """ - Draw a plot that shows intervals of time where the system was reported - as overutilized. + Draw the system's overutilized status as colored bands - The optional axes parameter allows to plot the signal on an existing - graph. - - :param axes: axes on which to plot the signal - :type axes: :mod:`matplotlib.axes.Axes` + :param axis: If provided, overlay the bands on this axis + :type axis: matplotlib.axes.Axes """ - if not self._trace.hasEvents('sched_overutilized'): - self.get_logger().warning('Event [sched_overutilized] not found, ' - 'plot DISABLED!') - return + local_fig = axis is None + + if local_fig: + fig, axis = self.setup_plot() df = self.df_overutilized() # Compute intervals in which the system is reported to be overutilized bands = [(t, df['len'][t], df['overutilized'][t]) for t in df.index] - # If not axis provided: generate a standalone plot - if not axes: - gs = gridspec.GridSpec(1, 1) - plt.figure(figsize=(16, 1)) - axes = plt.subplot(gs[0, 0]) - axes.set_title('System Status {white: EAS mode, ' - 'red: Non EAS mode}') - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_yticklabels([]) - axes.set_xlabel('Time [s]') - axes.grid(True) - - # Otherwise: draw overutilized bands on top of the specified plot + color = self.get_next_color(axis) + label = "Overutilized" for (start, delta, overutilized) in bands: if not overutilized: continue + end = start + delta - axes.axvspan(start, end, facecolor='r', alpha=0.1) + axis.axvspan(start, end, alpha=0.2, facecolor=color, label=label) + + if label: + label = None + + axis.legend() + + if local_fig: + axis.set_title("System-wide overutilized status") + axis.set_xlim(self._trace.x_min, self._trace.x_max) + self.save_plot(fig, filepath) # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/lisa/analysis/tasks.py b/lisa/analysis/tasks.py index 5c602cdf7..6aaf2644a 100644 --- a/lisa/analysis/tasks.py +++ b/lisa/analysis/tasks.py @@ -316,6 +316,10 @@ class TasksAnalysis(AnalysisBase): else: sw_df["__cpu"].plot(ax=axis, style='+') + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) + # Add an extra CPU lane to make room for the legend ylabels = [''] + [str(n) for n in range(self._trace.plat_info['cpus-count'])] axis.set_yticklabels(ylabels) @@ -325,8 +329,6 @@ class TasksAnalysis(AnalysisBase): axis.grid(True) axis.legend() - self._trace.analysis.status.plot_overutilized(axis) - self.save_plot(fig, filepath) return axis -- GitLab From 1f1a512d358e2bb8dca5a5fa9b6fa4af59ec2545 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 16:00:04 +0000 Subject: [PATCH 43/56] analysis/tasks: Fix plotting corner-case --- lisa/analysis/tasks.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lisa/analysis/tasks.py b/lisa/analysis/tasks.py index 6aaf2644a..3651dd389 100644 --- a/lisa/analysis/tasks.py +++ b/lisa/analysis/tasks.py @@ -311,8 +311,18 @@ class TasksAnalysis(AnalysisBase): if "freq-domains" in self._trace.plat_info: # If we are aware of frequency domains, use one color per domain for domain in self._trace.plat_info["freq-domains"]: - sw_df[sw_df["__cpu"].isin(domain)]["__cpu"].plot( - ax=axis, style='+', label="Task running in domain {}".format(domain)) + df = sw_df[sw_df["__cpu"].isin(domain)]["__cpu"] + + print(domain) + + if df.empty: + print(df.empty) + # Cycle the colours to stay consistent + self.cycle_colors(axis, 1) + else: + print(df.unique()) + df.plot(ax=axis, style='+', + label="Task running in domain {}".format(domain)) else: sw_df["__cpu"].plot(ax=axis, style='+') @@ -321,8 +331,7 @@ class TasksAnalysis(AnalysisBase): plot_overutilized(axis=axis) # Add an extra CPU lane to make room for the legend - ylabels = [''] + [str(n) for n in range(self._trace.plat_info['cpus-count'])] - axis.set_yticklabels(ylabels) + axis.set_ylim(-0.95, self._trace.cpus_count - 0.05) axis.set_title("CPU residency of task \"{}\"".format(task)) axis.set_ylabel('CPUs') -- GitLab From 632aa38b5e7b4f661e115fff796ab647eb71b121 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 16:00:20 +0000 Subject: [PATCH 44/56] analysis: Rework thermal analysis --- lisa/analysis/base.py | 111 ------------- lisa/analysis/thermal.py | 326 ++++++++++++++++----------------------- 2 files changed, 137 insertions(+), 300 deletions(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index 3400ac606..9f15fad41 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -189,115 +189,4 @@ class AnalysisBase(Loggable): raise RuntimeError( "Trace is missing the following required events: {}".format(missing_events)) - def _plot_generic(self, dfr, pivot, filters=None, columns=None, - prettify_name=None, width=16, height=4, - drawstyle="default", ax=None, title=""): - """ - Generic trace plotting method - - The values in the column 'pivot' will be used as discriminant - - Let's consider a df with these columns: - - | time | cpu | load_avg | util_avg | - ==================================== - | 42 | 2 | 1812 | 400 | - ------------------------------------ - | 43 | 0 | 1337 | 290 | - ------------------------------------ - | .. | ... | .. | .. | - - To plot the 'util_avg' value of CPU2, the function would be used like so: - :: - plot_generic(df, pivot='cpu', filters={'cpu' : [2]}, columns='util_avg') - - CPUs could be compared by using: - :: - plot_generic(df, pivot='cpu', filters={'cpu' : [2, 3]}, columns='util_avg') - - :param dfr: Trace dataframe - :type dfr: `pandas.DataFrame` - - :param pivot: Name of column that will serve as a pivot - :type pivot: str - - :param filters: Dataframe column filters - :type filters: dict - - :param columns: Name of columns whose data will be plotted - :type columns: str or list(str) - - :param prettify_name: user-friendly stringify function for pivot values - :type prettify_name: callable[str] - - :param width: The width of the plot - :type width: int - - :param height: The height of the plot - :type height: int - - :param drawstyle: The drawstyle setting of the plot - :type drawstyle: str - """ - - if prettify_name is None: - def prettify_name(name): return '{}={}'.format(pivot, name) - - if pivot not in dfr.columns: - raise ValueError('Invalid "pivot" parameter value: no {} column' - .format(pivot) - ) - - if columns is None: - # Find available columns - columns = dfr.columns.tolist() - columns.remove(pivot) - else: - # Filter out unwanted columns - columns = listify(columns) - try: - dfr = dfr[columns + [pivot]] - except KeyError as err: - raise ValueError('Invalid "columns" parameter value: {}' - .format(err.message) - ) - - # Apply filters - if filters is None: - filters = {} - - for col, vals in filters.items(): - dfr = dfr[dfr[col].isin(vals)] - - setup_plot = False - if ax is None: - _, ax = self.setup_plot(width, height) - setup_plot = True - - matches = dfr[pivot].unique().tolist() - - for match in matches: - renamed_cols = [] - for col in columns: - renamed_cols.append('{} {}'.format(prettify_name(match), col)) - - plot_dfr = dfr[dfr[pivot] == match][columns] - plot_dfr.columns = renamed_cols - plot_dfr.plot(ax=ax, drawstyle=drawstyle) - - if setup_plot: - ax.set_title(title) - - ax.set_xlim(self._trace.x_min, self._trace.x_max) - - # Extend ylim for better visibility - cur_lim = ax.get_ylim() - lim = (cur_lim[0] - 0.1 * (cur_lim[1] - cur_lim[0]), - cur_lim[1] + 0.1 * (cur_lim[1] - cur_lim[0])) - ax.set_ylim(lim) - - plt.legend() - - return ax - # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/lisa/analysis/thermal.py b/lisa/analysis/thermal.py index bc72f0832..011e5e0ad 100644 --- a/lisa/analysis/thermal.py +++ b/lisa/analysis/thermal.py @@ -15,22 +15,12 @@ # limitations under the License. # -""" Thermal Analysis Module """ - -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt -import pandas as pd -import pylab as pl -import operator -import os +from matplotlib.ticker import MaxNLocator -from trappy.utils import listify from devlib.utils.misc import list_to_mask, mask_to_list -from lisa.analysis.base import AnalysisBase +from lisa.analysis.base import AnalysisBase, requires_events from lisa.utils import memoized -from bart.common.Utils import area_under_curve -from matplotlib.ticker import MaxNLocator class ThermalAnalysis(AnalysisBase): @@ -38,245 +28,203 @@ class ThermalAnalysis(AnalysisBase): Support for plotting Thermal Analysis data :param trace: input Trace object - :type trace: lisa.Trace + :type trace: :class:`trace.Trace` """ name = 'thermal' -############################################################################### -# Analysis properties -############################################################################### + @requires_events(["thermal_temperature"]) + def df_thermal_zones_temperature(self): + """ + Get the temperature of the thermal zones + + :returns: a :class:`pandas.DataFrame` with: + + * An ``id`` column (The thermal zone ID) + * A ``thermal_zone`` column (The thermal zone name) + * A ``temp`` column (The reported temperature) + """ + df = self._trace.df_events("thermal") + df = df[['id', 'thermal_zone', 'temp']] + + return df + + @requires_events(["thermal_power_cpu_limit"]) + def df_cpufreq_cooling_state(self, cpus=None): + """ + Get cpufreq cooling device states + + :param cpus: The CPUs to consider (all by default) + :type cpus: list(int) + + :returns: a :class:`pandas.DataFrame` with: + + * An ``cpus`` column (The CPUs affected by the cooling device) + * A ``freq`` column (The frequency limit) + * A ``cdev_state`` column (The cooling device state index) + + """ + df = self._trace.df_events("cpu_out_power") + df = df[['cpus', 'freq', 'cdev_state']] + + if cpus is not None: + # Find masks that match the requested CPUs + # This can include other CPUs + masks = self._matching_masks(cpus) + df = df[df.cpus.isin(masks)] + + return df + + @requires_events(["thermal_power_devfreq_limit"]) + def df_devfreq_cooling_state(self, devices=None): + """ + Get devfreq cooling device states + + :param devices: The devfreq devices to consider (all by default) + :type device: list(str) + + :returns: a :class:`pandas.DataFrame` with: + + * An ``cpus`` column (The CPUs affected by the cooling device) + * A ``freq`` column (The frequency limit) + * A ``cdev_state`` column (The cooling device state index) + """ + df = self._trace.df_events("devfreq_out_power") + df = df[['type', 'freq', 'cdev_state']] + + if devices is not None: + df = df[df.type.isin(devices)] + + return df @property @memoized + @requires_events(df_thermal_zones_temperature.required_events) def thermal_zones(self): """ Get thermal zone ids that appear in the trace """ - df = self._trace.df_events('thermal_temperature') + df = self.df_thermal_zones_temperature() return df["thermal_zone"].unique().tolist() @property @memoized + @requires_events(df_cpufreq_cooling_state.required_events) def cpufreq_cdevs(self): """ Get cpufreq cooling devices that appear in the trace """ - df = self._trace.df_events('thermal_power_cpu_limit') + df = self.df_cpufreq_cooling_state() res = df['cpus'].unique().tolist() return [mask_to_list(mask) for mask in res] @property @memoized + @requires_events(df_devfreq_cooling_state.required_events) def devfreq_cdevs(self): """ Get devfreq cooling devices that appear in the trace """ - df = self._trace.df_events('thermal_power_devfreq_limit') + df = self.df_devfreq_cooling_state() return df['type'].unique().tolist() ############################################################################### -# DataFrame Getter Methods +# Plotting Methods ############################################################################### - def df_thermal_zone_temperature(self, ids=None): + @requires_events(df_thermal_zones_temperature.required_events) + def plot_thermal_zone_temperature(self, thermal_zone_id, filepath=None, axis=None): """ - Get the temperature readings of one or more thermal zone(s) - (all by default) + Plot temperature of thermal zones (all by default) - :param ids: The thermal zones to consider - :type ids: list(int) + :param thermal_zone_id: ID of the zone + :type thermal_zone_id: int """ - df = self._trace.df_events('thermal_temperature') - df = df[['id', 'thermal_zone', 'temp']] + local_fig = axis is None - if ids is not None: - df = df[df.id.isin(ids)] + if local_fig: + fig, axis = self.setup_plot() - return df + df = self.df_thermal_zones_temperature() + df = df[df.id == thermal_zone_id] - def df_cpufreq_cooling_state(self, cpus=None): - """ - Get the cooling states of one or more cpufreq cooling device(s) - (all by default) + tz_name = df.thermal_zone.unique()[0] - :param cpus: The CPUs to consider - :type cpus: list(int) - """ - df = self._trace.df_events('thermal_power_cpu_limit') - df = df[['cpus', 'freq', 'cdev_state']] + df.temp.plot(drawstyle="steps-post", ax=axis, + label="Thermal zone \"{}\"".format(tz_name)) - if cpus is not None: - # Find masks that match the requested CPUs - # This can include other CPUs - masks = self._matching_masks(cpus) - df = df[df.cpus.isin(masks)] + axis.legend() - return df + if local_fig: + axis.grid(True) + axis.set_title("Temperature evolution") + axis.set_ylabel("Temperature (°C.10e3)") + axis.set_xlim(self._trace.x_min, self._trace.x_max) + self.save_plot(fig, filepath) - def df_devfreq_cooling_state(self, devices=None): + return axis + + @requires_events(df_cpufreq_cooling_state.required_events) + def plot_cpu_cooling_states(self, cpu, filepath=None, axis=None): """ - Get the cooling states of one or more devfreq cooling device(s) - (all by default) + Plot the state evolution of a cpufreq cooling device - :param devices: The devfreq devices to consider - :type device: list(str) + :param cpu: The CPU. Whole clusters can be controlled as + a single cooling device, they will be plotted as long this CPU + belongs to the cluster. + :type cpu: int """ - df = self._trace.df_events('thermal_power_devfreq_limit') - df = df[['type', 'freq', 'cdev_state']] + local_fig = axis is None - if devices is not None: - df = df[df.type.isin(devices)] + if local_fig: + fig, axis = self.setup_plot() - return df + df = self.df_cpufreq_cooling_state([cpu]) + cdev_name = "CPUs {}".format(mask_to_list(df.cpus.unique()[0])) + df.cdev_state.plot(drawstyle="steps-post", ax=axis, + label="\"{}\"".format(cdev_name)) -############################################################################### -# Plotting Methods -############################################################################### + axis.legend() - def plot_temperature(self, thermal_zones=None, ax=None): - """ - Plot temperature of thermal zones (all by default) + if local_fig: + axis.grid(True) + axis.set_title("cpufreq cooling devices status") + axis.yaxis.set_major_locator(MaxNLocator(integer=True)) + axis.grid(axis='y') + axis.set_xlim(self._trace.x_min, self._trace.x_max) + self.save_plot(fig, filepath) - Requires the following trace event: - - thermal_temperature + return axis - :param thermal_zones: ID(s) of the zones to be plotted. - All the zones are plotted by default. - IDs can be found in syfs: /sys/class/thermal/thermal_zone - :type thermal_zones: list(int) - """ - if not self._trace.hasEvents('thermal_temperature'): - self.get_logger().warning('Event [{}] not found, plot DISABLED!' - .format('thermal_temperature')) - return - - plot_df = self.df_thermal_zone_temperature(thermal_zones) - - def stringify_tz(id): - return plot_df[plot_df.id == id]['thermal_zone'].unique()[0] - - filters = None if thermal_zones is None else {'thermal_zone' : thermal_zones} - self._plot_generic(plot_df, 'id', filters=filters, columns=['temp'], - prettify_name=stringify_tz, - drawstyle='steps-post', ax=ax - ) - - if thermal_zones is None: - suffix = '' - else: - suffix = '_' + '_'.join(map(str, thermal_zones)) - - # Save generated plots into datadir - figname = os.path.join( - self._trace.plots_dir, - '{}thermal_temperature{}.png'.format( - self._trace.plots_dir, self._trace.plots_prefix, suffix - ) - ) - - pl.savefig(figname, bbox_inches='tight') - - def plot_cpu_cooling_states(self, cpus=None, ax=None): + def plot_dev_freq_cooling_states(self, device, filepath=None, axis=None): """ - Plot the state evolution of cpufreq cooling devices (all by default) + Plot the state evolution of a devfreq cooling device - Requires the following trace event: - - thermal_power_cpu_limit - - :param cpus: list of CPUs to plot. Whole clusters can be controlled as - a single cooling device, they will be plotted as long as one of their - CPUs is in the list. - :type cpus: list(int) + :param device: The devfreq devices to consider + :type device: str """ - if not self._trace.hasEvents('thermal_power_cpu_limit'): - self.get_logger().warning('Event [{}] not found, plot DISABLED!' - .format('thermal_power_cpu_limit')) - return + local_fig = axis is None - plot_df = self._trace.df_events('thermal_power_cpu_limit') + if local_fig: + fig, axis = self.setup_plot() - def stringify_mask(mask): - return 'CPUs {}'.format(mask_to_list(mask)) + df = self.df_devfreq_cooling_state([device]) - # Find masks that match the requested CPUs - # This can include other CPUs - masks = None - if cpus is not None: - masks = self._matching_masks(cpus) + df.cdev_state.plot(drawstyle="steps-post", ax=axis, + label="Device \"{}\"".format(device)) - filters = None if masks is None else {'cpus' : masks} - _ax = self._plot_generic(plot_df, 'cpus', filters=filters, columns=['cdev_state'], - prettify_name=stringify_mask, - drawstyle='steps-post', ax=ax - ) - - if ax is None: - ax = _ax - - # Cdev status is an integer series - ax.yaxis.set_major_locator(MaxNLocator(integer=True)) - ax.grid(axis='y') - - if cpus is None: - suffix = '' - else: - suffix = '_' + '_'.join(map(str, cpus)) - - # Save generated plots into datadir - figname = os.path.join( - self._trace.plots_dir, - '{}thermal_cpufreq_cdev_state{}.png'.format( - self._trace.plots_dir, self._trace.plots_prefix, suffix - ) - ) - pl.savefig(figname, bbox_inches='tight') - - def plot_dev_freq_cooling_states(self, devices=None, ax=None): - """ - Plot the state evolution of devfreq cooling devices (all by default) + axis.legend() - Requires the following trace event: - - thermal_power_devfreq_limit + if local_fig: + axis.grid(True) + axis.set_title("devfreq cooling devices status") + axis.yaxis.set_major_locator(MaxNLocator(integer=True)) + axis.grid(axis='y') + axis.set_xlim(self._trace.x_min, self._trace.x_max) + self.save_plot(fig, filepath) - :param devices: list of devfreq devices to plot. - :type cpus: list(int) - """ - if not self._trace.hasEvents('thermal_power_devfreq_limit'): - self.get_logger().warning('Event [{}] not found, plot DISABLED!' - .format('thermal_power_devfreq_limit')) - return - - plot_df = self._trace.df_events('thermal_power_devfreq_limit') - - # Might have more than one device selected by 'type', but that's - # the best we can do - filters = None if devices is None else {'type' : devices} - _ax = self._plot_generic(plot_df, 'type', filters=filters, columns=['cdev_state'], - drawstyle='steps-post', ax=ax - ) - - if ax is None: - ax = _ax - - # Cdev status is an integer series - ax.yaxis.set_major_locator(MaxNLocator(integer=True)) - ax.grid(axis='y') - - if devices is None: - suffix = '' - else: - suffix = '_' + '_'.join(map(str, devices)) - - # Save generated plots into datadir - figname = os.path.join( - self._trace.plots_dir, - '{}thermal_devfreq_cdev_state{}.png'.format( - self._trace.plots_dir, self._trace.plots_prefix, suffix - ) - ) - pl.savefig(figname, bbox_inches='tight') + return axis ############################################################################### # Utility Methods -- GitLab From bf987faf717902fb76898099267a25e2de83cd7b Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 16:11:49 +0000 Subject: [PATCH 45/56] analysis: Use trace.setXTimeRange() where relevant --- lisa/analysis/frequency.py | 1 + lisa/analysis/load_tracking.py | 1 + lisa/analysis/tasks.py | 5 +++++ 3 files changed, 7 insertions(+) diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index cb6cae7f6..863f115b5 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -359,6 +359,7 @@ class FrequencyAnalysis(AnalysisBase): self.plot_cpu_frequencies(domain[0], filepath, axis) axis.set_title('Frequencies of CPUS {}'.format(domain)) + axis.set_xlim(self._trace.x_min, self._trace.x_max) self.save_plot(fig, filepath) diff --git a/lisa/analysis/load_tracking.py b/lisa/analysis/load_tracking.py index 229c1d5c4..1710a8d3a 100644 --- a/lisa/analysis/load_tracking.py +++ b/lisa/analysis/load_tracking.py @@ -245,6 +245,7 @@ class LoadTrackingAnalysis(AnalysisBase): axis.set_title('Load-tracking signals of task "{}"'.format(task)) axis.legend() axis.grid(True) + axis.set_xlim(self._trace.x_min, self._trace.x_max) self.save_plot(fig, filepath) return axis diff --git a/lisa/analysis/tasks.py b/lisa/analysis/tasks.py index 3651dd389..fd04080d9 100644 --- a/lisa/analysis/tasks.py +++ b/lisa/analysis/tasks.py @@ -337,6 +337,7 @@ class TasksAnalysis(AnalysisBase): axis.set_ylabel('CPUs') axis.grid(True) axis.legend() + axis.set_xlim(self._trace.x_min, self._trace.x_max) self.save_plot(fig, filepath) @@ -413,6 +414,7 @@ class TasksAnalysis(AnalysisBase): df.plot(ax=axis, legend=False) axis.set_title("Number of task wakeups within {}s windows".format(time_delta)) + axis.set_xlim(self._trace.x_min, self._trace.x_max) self.save_plot(fig, filepath) @@ -436,6 +438,7 @@ class TasksAnalysis(AnalysisBase): df.index, df.target_cpu, xbins, "Number of wakeups", cmap=colormap) axis.set_title("Tasks wakeups over time") + axis.set_xlim(self._trace.x_min, self._trace.x_max) self.save_plot(fig, filepath) @@ -464,6 +467,7 @@ class TasksAnalysis(AnalysisBase): df.plot(ax=axis, legend=False) axis.set_title("Number of task forks within {}s windows".format(time_delta)) + axis.set_xlim(self._trace.x_min, self._trace.x_max) self.save_plot(fig, filepath) @@ -487,6 +491,7 @@ class TasksAnalysis(AnalysisBase): df.index, df.target_cpu, xbins, "Number of forks", cmap=colormap) axis.set_title("Tasks forks over time") + axis.set_xlim(self._trace.x_min, self._trace.x_max) self.save_plot(fig, filepath) -- GitLab From bab67acd1bcaee141ee4ff1ff1d210e2553df841 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 18:59:54 +0000 Subject: [PATCH 46/56] analysis/load_tracking: Stray PID sidecheck --- lisa/analysis/load_tracking.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/lisa/analysis/load_tracking.py b/lisa/analysis/load_tracking.py index 1710a8d3a..1691fe5f3 100644 --- a/lisa/analysis/load_tracking.py +++ b/lisa/analysis/load_tracking.py @@ -223,16 +223,9 @@ class LoadTrackingAnalysis(AnalysisBase): """ fig, axis = self.setup_plot() - if isinstance(task, str): - pid_list = self._trace.get_task_by_name(task) - if len(pid_list) > 1: - self.get_logger().warning( - "More than one PID found for task {}, using the first one".format(task)) - pid = pid_list[0] - else: - pid = task - df = self.df_tasks_signals() + + pid = self._trace.get_task_pid(task) df = df[df.pid == pid] df[['util']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) -- GitLab From d4776edb36c619484a9f5af9ffc53c7aaf3321c3 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 17:21:44 +0000 Subject: [PATCH 47/56] trace: Fix plots_dir setting if data_dir='foo/trace.dat' --- lisa/analysis/base.py | 2 +- lisa/trace.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index 9f15fad41..a0cafa01b 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -182,7 +182,7 @@ class AnalysisBase(Loggable): :raises: RuntimeError if some events are not available """ - available_events = sorted(set(self._trace.events)) + available_events = sorted(set(self._trace.available_events)) missing_events = sorted(set(required_events).difference(available_events)) if missing_events: diff --git a/lisa/trace.py b/lisa/trace.py index 306e1b610..86a08cc8b 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -125,7 +125,12 @@ class Trace(Loggable): # By deafult, use the trace dir to save plots self.plots_dir = plots_dir if self.plots_dir is None: - self.plots_dir = self.data_dir + # In case we're passed the trace.dat + if os.path.isfile(data_dir): + self.plots_dir = os.path.dirname(data_dir) + else: + self.plots_dir = data_dir + self.plots_prefix = plots_prefix self.__registerTraceEvents(events) -- GitLab From 797cf62b12138aa9b9ab12f3fe2fce313503bacc Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 17:27:41 +0000 Subject: [PATCH 48/56] analysis/load_tracking: Fix signal uniformization --- lisa/analysis/load_tracking.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lisa/analysis/load_tracking.py b/lisa/analysis/load_tracking.py index 1691fe5f3..6e2f59b54 100644 --- a/lisa/analysis/load_tracking.py +++ b/lisa/analysis/load_tracking.py @@ -56,12 +56,15 @@ class LoadTrackingAnalysis(AnalysisBase): if event in ['sched_load_cfs_rq', 'sched_load_se']: return ['path', 'rbl_load', 'cpu'] + if event in ['sched_load_avg_task']: + return ['load_sum', 'period_contrib', 'util_sum'] + return [] def _df_uniformized_signal(self, event): df = self._trace.df_events(event) - df = df.rename(self._columns_renaming(event)) + df = df.rename(columns=self._columns_renaming(event)) if event == 'sched_load_se': df = df[df.path == "(null)"] @@ -70,15 +73,14 @@ class LoadTrackingAnalysis(AnalysisBase): df = df[df.path == "/"] to_drop = self._columns_to_drop(event) - if to_drop: - df = df.drop(to_drop, axis=1) + df = df[[col for col in df.columns if col not in to_drop]] return df def _df_either_event(self, events): for event in events: - if event not in self._trace.events: + if event not in self._trace.available_events: continue return self._df_uniformized_signal(event) -- GitLab From 8f6bf1cde20dabe220a04ad200b6c7900515328e Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 17:28:15 +0000 Subject: [PATCH 49/56] tests/lisa: Update test_trace regarding analysis reworks --- lisa/tests/lisa/test_trace.py | 51 +-- lisa/tests/lisa/traces/plat_info.yml | 372 +++++++++++++++-- .../lisa/traces/sched_load/plat_info.yml | 373 ++++++++++++++++-- .../lisa/traces/sched_load_avg/plat_info.yml | 41 +- 4 files changed, 738 insertions(+), 99 deletions(-) diff --git a/lisa/tests/lisa/test_trace.py b/lisa/tests/lisa/test_trace.py index 7a9791e3e..4cea9bb79 100644 --- a/lisa/tests/lisa/test_trace.py +++ b/lisa/tests/lisa/test_trace.py @@ -79,23 +79,23 @@ class TestTrace(StorageTestCase): path = os.path.join(trace_dir, 'plat_info.yml') return PlatformInfo.from_yaml_map(path) - def test_getTaskByName(self): - """TestTrace: getTaskByName() returns the list of PIDs for all tasks with the specified name""" + def test_get_task_by_name(self): + """TestTrace: get_task_by_name() returns the list of PIDs for all tasks with the specified name""" for name, pids in [('watchdog/0', [12]), ('sh', [1642, 1702, 1717, 1718]), ('NOT_A_TASK', [])]: - self.assertEqual(self.trace.getTaskByName(name), pids) + self.assertEqual(self.trace.get_task_by_name(name), pids) - def test_getTaskByPid(self): - """TestTrace: getTaskByPid() returns the name of the task with the specified PID""" + def test_get_task_by_pid(self): + """TestTrace: get_task_by_pid() returns the name of the task with the specified PID""" for pid, names in [(15, 'watchdog/1'), (1639, 'sshd'), (987654321, None)]: - self.assertEqual(self.trace.getTaskByPid(pid), names) + self.assertEqual(self.trace.get_task_by_pid(pid), names) - def test_getTasks(self): - """TestTrace: getTasks() returns a dictionary mapping PIDs to a single task name""" - tasks_dict = self.trace.getTasks() + def test_get_tasks(self): + """TestTrace: get_tasks() returns a dictionary mapping PIDs to a single task name""" + tasks_dict = self.trace.get_tasks() for pid, name in [(1, 'init'), (9, 'rcu_sched'), (1383, 'jbd2/sda2-8')]: @@ -109,9 +109,9 @@ class TestTrace(StorageTestCase): """ trace = self.make_trace(in_data) - self.assertEqual(trace.getTaskByPid(1234), 'father') - self.assertEqual(trace.getTaskByPid(5678), 'child') - self.assertEqual(trace.getTaskByName('father'), [1234]) + self.assertEqual(trace.get_task_by_pid(1234), 'father') + self.assertEqual(trace.get_task_by_pid(5678), 'child') + self.assertEqual(trace.get_task_by_name('father'), [1234]) def test_time_range(self): """ @@ -242,7 +242,7 @@ class TestTrace(StorageTestCase): """ trace = self.make_trace(in_data) - trace.analysis.idle.plot_cpu_idle_state_residency() + trace.analysis.idle.plot_cpu_idle_state_residency(0) def test_deriving_cpus_count(self): """Test that Trace derives cpus_count if it isn't provided""" @@ -259,7 +259,7 @@ class TestTrace(StorageTestCase): self.assertEqual(trace.cpus_count, 3) - def test_df_cpu_wakeups(self): + def test_df_cpus_wakeups(self): """ Test the cpu_wakeups DataFrame getter """ @@ -276,14 +276,14 @@ class TestTrace(StorageTestCase): -0 [004] 519.023080: cpu_idle: state=1 cpu_id=4 """) - df = trace.analysis.cpus.df_cpu_wakeups() + df = trace.analysis.idle.df_cpus_wakeups() exp_index=[519.021928, 519.022641, 519.022642, 519.022643, 519.022867] exp_cpus= [ 4, 4, 1, 2, 3] self.assertListEqual(df.index.tolist(), exp_index) self.assertListEqual(df.cpu.tolist(), exp_cpus) - df = trace.analysis.cpus.df_cpu_wakeups([2]) + df = df[df.cpu == 2] self.assertListEqual(df.index.tolist(), [519.022643]) self.assertListEqual(df.cpu.tolist(), [2]) @@ -292,29 +292,18 @@ class TestTrace(StorageTestCase): """Helper for smoke testing _dfg methods in tasks_analysis""" trace = self.get_trace(trace_name) - lt_df = trace.analysis.tasks.df_load() - columns = ['comm', 'pid', 'load_avg', 'util_avg', 'cpu'] - if trace.has_big_little: - columns += ['cluster'] - if 'nrg-model' in trace.plat_info: - columns += ['min_cluster_cap'] + lt_df = trace.analysis.load_tracking.df_tasks_signals() + columns = ['comm', 'pid', 'load', 'util', '__cpu'] for column in columns: msg = 'Task signals parsed from {} missing {} column'.format( trace.data_dir, column) self.assertIn(column, lt_df, msg=msg) - if trace.has_big_little: - df = trace.analysis.tasks.df_top_big_tasks(min_samples=1) - for column in ['samples', 'comm']: - msg = 'Big tasks parsed from {} missing {} column'.format( - trace.data_dir, column) - self.assertIn(column, df, msg=msg) - # Pick an arbitrary PID to try plotting signals for. pid = lt_df['pid'].unique()[0] # Call plot - although we won't check the results we can just check # that things aren't totally borken. - trace.analysis.tasks.plot_tasks(tasks=[pid]) + trace.analysis.load_tracking.plot_task_signals(pid) def test_sched_load_signals(self): """Test parsing sched_load_se events from EAS upstream integration""" @@ -364,7 +353,7 @@ class TestTraceNoClusterData(TestTrace): def _get_plat_info(self, trace_name=None): plat_info = super(TestTraceNoClusterData, self)._get_plat_info(trace_name) plat_info = copy.copy(plat_info) - plat_info.force_src('clusters', ['SOURCE THAT DOES NOT EXISTS']) + plat_info.force_src('freq-domains', ['SOURCE THAT DOES NOT EXISTS']) return plat_info class TestTraceNoPlatform(TestTrace): diff --git a/lisa/tests/lisa/traces/plat_info.yml b/lisa/tests/lisa/traces/plat_info.yml index 80fd5d1a7..e84ef5dae 100644 --- a/lisa/tests/lisa/traces/plat_info.yml +++ b/lisa/tests/lisa/traces/plat_info.yml @@ -1,27 +1,355 @@ platform-info: conf: - os: linux abi: arm64 - clusters: - big: [1, 2] - little: [0, 3, 4, 5] + cpu-capacities: + 0: 446 + 1: 1024 + 2: 1024 + 3: 446 + 4: 446 + 5: 446 cpus-count: 6 - topology: !call:trappy.stats.Topology.Topology - clusters: [[0, 3, 4, 5], [1, 2]] + freq-domains: + - - 0 + - 3 + - 4 + - 5 + - - 1 + - 2 freqs: - big: [ - 450000, - 625000, - 800000, - 950000, - 1100000 - ] - little: [ - 450000, - 575000, - 700000, - 775000, - 850000 - ] - kernel-version: !call:devlib.target.KernelVersion - version_string: "4.11.0-rc6-00091-g48e946ac4235-dirty #67 SMP PREEMPT Mon Jul 3 16:23:08 BST 2017" + 0: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + 1: + - 450000 + - 625000 + - 800000 + - 950000 + - 1100000 + 2: + - 450000 + - 625000 + - 800000 + - 950000 + - 1100000 + 3: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + 4: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + 5: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + kernel-version: !!python/object:devlib.target.KernelVersion + major: 19 + minor: 0 + parts: !!python/tuple + - 4 + - 19 + - 0 + rc: null + release: 4.19.0-07801-gf317706 + sha1: f317706 + version: 38 SMP PREEMPT Fri Nov 30 13:55:54 GMT 2018 + version_number: 4 + name: juno + nrg-model: !!python/object:lisa.energy_model.EnergyModel + cpu_nodes: + - &id001 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 0 + cpus: !!python/tuple + - 0 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu0 + parent: &id002 !!python/object:lisa.energy_model.EnergyModelRoot + active_states: null + children: + - *id001 + - &id003 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 418.90909090909093 + - 160 + - 625000: !!python/object/new:lisa.energy_model.ActiveState + - 581.8181818181819 + - 239 + - 800000: !!python/object/new:lisa.energy_model.ActiveState + - 744.7272727272727 + - 343 + - 950000: !!python/object/new:lisa.energy_model.ActiveState + - 884.3636363636364 + - 454 + - 1100000: !!python/object/new:lisa.energy_model.ActiveState + - 1024.0 + - 583 + children: [] + cpu: 1 + cpus: !!python/tuple + - 1 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu1 + parent: *id002 + - &id004 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 418.90909090909093 + - 160 + - 625000: !!python/object/new:lisa.energy_model.ActiveState + - 581.8181818181819 + - 239 + - 800000: !!python/object/new:lisa.energy_model.ActiveState + - 744.7272727272727 + - 343 + - 950000: !!python/object/new:lisa.energy_model.ActiveState + - 884.3636363636364 + - 454 + - 1100000: !!python/object/new:lisa.energy_model.ActiveState + - 1024.0 + - 583 + children: [] + cpu: 2 + cpus: !!python/tuple + - 2 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu2 + parent: *id002 + - &id005 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 3 + cpus: !!python/tuple + - 3 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu3 + parent: *id002 + - &id006 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 4 + cpus: !!python/tuple + - 4 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu4 + parent: *id002 + - &id007 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 5 + cpus: !!python/tuple + - 5 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu5 + parent: *id002 + cpu: null + cpus: &id015 !!python/tuple + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + idle_states: null + name: null + parent: null + - *id003 + - *id004 + - *id005 + - *id006 + - *id007 + cpu_pds: + - &id008 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 0 + cpus: !!python/tuple + - 0 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: &id009 !!python/object:lisa.energy_model.PowerDomain + children: + - *id008 + - &id010 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 1 + cpus: !!python/tuple + - 1 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id011 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 2 + cpus: !!python/tuple + - 2 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id012 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 3 + cpus: !!python/tuple + - 3 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id013 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 4 + cpus: !!python/tuple + - 4 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id014 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 5 + cpus: !!python/tuple + - 5 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + cpu: null + cpus: !!python/tuple + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + idle_states: [] + name: null + parent: null + - *id010 + - *id011 + - *id012 + - *id013 + - *id014 + cpus: *id015 + freq_domains: + - - 1 + - 2 + - - 0 + - 3 + - 4 + - 5 + pd: *id009 + root: *id002 + os: linux + rtapp: + calib: + 0: 660 + 1: 313 + 2: 313 + 3: 660 + 4: 660 + 5: 661 diff --git a/lisa/tests/lisa/traces/sched_load/plat_info.yml b/lisa/tests/lisa/traces/sched_load/plat_info.yml index b1af46625..e84ef5dae 100644 --- a/lisa/tests/lisa/traces/sched_load/plat_info.yml +++ b/lisa/tests/lisa/traces/sched_load/plat_info.yml @@ -1,28 +1,355 @@ platform-info: conf: - os: linux abi: arm64 - clusters: - big: [1, 2] - little: [0, 3, 4, 5] + cpu-capacities: + 0: 446 + 1: 1024 + 2: 1024 + 3: 446 + 4: 446 + 5: 446 cpus-count: 6 + freq-domains: + - - 0 + - 3 + - 4 + - 5 + - - 1 + - 2 freqs: - big: [ - 450000, - 625000, - 800000, - 950000, - 1100000 - ] - little: [ - 450000, - 575000, - 700000, - 775000, - 850000 - ] - kernel-version: !call:devlib.target.KernelVersion - version_string: "4.11.0-rc6-00092-g9cc3141d9e4f-dirty #58 SMP PREEMPT Wed May 24 18:37:50 BST 2017" - - topology: !call:trappy.stats.Topology.Topology - clusters: [[0, 3, 4, 5], [1, 2]] + 0: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + 1: + - 450000 + - 625000 + - 800000 + - 950000 + - 1100000 + 2: + - 450000 + - 625000 + - 800000 + - 950000 + - 1100000 + 3: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + 4: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + 5: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + kernel-version: !!python/object:devlib.target.KernelVersion + major: 19 + minor: 0 + parts: !!python/tuple + - 4 + - 19 + - 0 + rc: null + release: 4.19.0-07801-gf317706 + sha1: f317706 + version: 38 SMP PREEMPT Fri Nov 30 13:55:54 GMT 2018 + version_number: 4 + name: juno + nrg-model: !!python/object:lisa.energy_model.EnergyModel + cpu_nodes: + - &id001 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 0 + cpus: !!python/tuple + - 0 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu0 + parent: &id002 !!python/object:lisa.energy_model.EnergyModelRoot + active_states: null + children: + - *id001 + - &id003 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 418.90909090909093 + - 160 + - 625000: !!python/object/new:lisa.energy_model.ActiveState + - 581.8181818181819 + - 239 + - 800000: !!python/object/new:lisa.energy_model.ActiveState + - 744.7272727272727 + - 343 + - 950000: !!python/object/new:lisa.energy_model.ActiveState + - 884.3636363636364 + - 454 + - 1100000: !!python/object/new:lisa.energy_model.ActiveState + - 1024.0 + - 583 + children: [] + cpu: 1 + cpus: !!python/tuple + - 1 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu1 + parent: *id002 + - &id004 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 418.90909090909093 + - 160 + - 625000: !!python/object/new:lisa.energy_model.ActiveState + - 581.8181818181819 + - 239 + - 800000: !!python/object/new:lisa.energy_model.ActiveState + - 744.7272727272727 + - 343 + - 950000: !!python/object/new:lisa.energy_model.ActiveState + - 884.3636363636364 + - 454 + - 1100000: !!python/object/new:lisa.energy_model.ActiveState + - 1024.0 + - 583 + children: [] + cpu: 2 + cpus: !!python/tuple + - 2 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu2 + parent: *id002 + - &id005 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 3 + cpus: !!python/tuple + - 3 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu3 + parent: *id002 + - &id006 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 4 + cpus: !!python/tuple + - 4 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu4 + parent: *id002 + - &id007 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 5 + cpus: !!python/tuple + - 5 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu5 + parent: *id002 + cpu: null + cpus: &id015 !!python/tuple + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + idle_states: null + name: null + parent: null + - *id003 + - *id004 + - *id005 + - *id006 + - *id007 + cpu_pds: + - &id008 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 0 + cpus: !!python/tuple + - 0 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: &id009 !!python/object:lisa.energy_model.PowerDomain + children: + - *id008 + - &id010 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 1 + cpus: !!python/tuple + - 1 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id011 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 2 + cpus: !!python/tuple + - 2 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id012 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 3 + cpus: !!python/tuple + - 3 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id013 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 4 + cpus: !!python/tuple + - 4 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id014 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 5 + cpus: !!python/tuple + - 5 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + cpu: null + cpus: !!python/tuple + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + idle_states: [] + name: null + parent: null + - *id010 + - *id011 + - *id012 + - *id013 + - *id014 + cpus: *id015 + freq_domains: + - - 1 + - 2 + - - 0 + - 3 + - 4 + - 5 + pd: *id009 + root: *id002 + os: linux + rtapp: + calib: + 0: 660 + 1: 313 + 2: 313 + 3: 660 + 4: 660 + 5: 661 diff --git a/lisa/tests/lisa/traces/sched_load_avg/plat_info.yml b/lisa/tests/lisa/traces/sched_load_avg/plat_info.yml index 23578d8c8..807720c99 100644 --- a/lisa/tests/lisa/traces/sched_load_avg/plat_info.yml +++ b/lisa/tests/lisa/traces/sched_load_avg/plat_info.yml @@ -2,20 +2,18 @@ platform-info: conf: os: android abi: arm64 - clusters: - big: [2, 3] - little: [0, 1] cpus-count: 4 + freq-domains: [[0, 1], [2, 3]] freqs: - big: [ + 0: [ 307200, 384000, 460800, 537600, 614400, 691200, - 748800, - 825600, + 768000, + 844800, 902400, 979200, 1056000, @@ -25,24 +23,17 @@ platform-info: 1363200, 1440000, 1516800, - 1593600, - 1670400, - 1747200, - 1824000, - 1900800, - 1977600, - 2054400, - 2150400 - ] - little: [ + 1593600 + ] + 1: [ 307200, 384000, 460800, 537600, 614400, 691200, - 768000, - 844800, + 748800, + 825600, 902400, 979200, 1056000, @@ -52,10 +43,14 @@ platform-info: 1363200, 1440000, 1516800, - 1593600 - ] + 1593600, + 1670400, + 1747200, + 1824000, + 1900800, + 1977600, + 2054400, + 2150400 + ] kernel-version: !call:devlib.target.KernelVersion version_string: "3.18.31-gbd96fbf #1 SMP PREEMPT Mon Nov 7 20:29:14 UTC 2016" - - topology: !call:trappy.stats.Topology.Topology - clusters: [[0, 1], [2, 3]] -- GitLab From 59e170e03776788a67311f2b73413b9b5030b98f Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 18:33:42 +0000 Subject: [PATCH 50/56] tests/kernel: Update tests regarding analysis rework --- lisa/tests/kernel/scheduler/eas_behaviour.py | 4 +- lisa/tests/kernel/scheduler/load_tracking.py | 39 +++--------- lisa/tests/kernel/scheduler/misfit.py | 66 ++++++++++---------- 3 files changed, 43 insertions(+), 66 deletions(-) diff --git a/lisa/tests/kernel/scheduler/eas_behaviour.py b/lisa/tests/kernel/scheduler/eas_behaviour.py index f5e6ecb48..efb382e6a 100644 --- a/lisa/tests/kernel/scheduler/eas_behaviour.py +++ b/lisa/tests/kernel/scheduler/eas_behaviour.py @@ -127,8 +127,8 @@ class EASBehaviour(RTATestBundle, abc.ABC): start_time = self.trace.start_time + self.trace.time_range for task in tasks: - pid = self.trace.getTaskByName(task) - assert len(pid) == 1, "getTaskByName returned more than one PID" + pid = self.trace.get_task_by_name(task) + assert len(pid) == 1, "get_task_by_name returned more than one PID" pid = pid[0] start_time = min(start_time, sdf[sdf.next_pid == pid].index[0]) diff --git a/lisa/tests/kernel/scheduler/load_tracking.py b/lisa/tests/kernel/scheduler/load_tracking.py index 9e1eb350f..b926451d4 100644 --- a/lisa/tests/kernel/scheduler/load_tracking.py +++ b/lisa/tests/kernel/scheduler/load_tracking.py @@ -154,34 +154,15 @@ class LoadTrackingBase(RTATestBundle, LoadTrackingHelpers): :returns: :class:`pandas.DataFrame` with a column for each signal for the workload task """ - # There are two different scheduler trace events that expose the load - # tracking signals. Neither of them is in mainline. Eventually they - # should be unified but for now we'll just check for both types of - # event. - # TODO: Add support for this parsing in Trappy and/or tasks_analysis - signal_fields = signals - if 'sched_load_avg_task' in trace.available_events: - event = 'sched_load_avg_task' - elif 'sched_load_se' in trace.available_events: - event = 'sched_load_se' - # sched_load_se uses 'util' and 'load' instead of 'util_avg' and - # 'load_avg' - signal_fields = [s.replace('_avg', '') for s in signals] - elif 'sched_pelt_se' in trace.available_events: - event = 'sched_pelt_se' - else: - raise ValueError('No sched_load_avg_task or sched_load_se or sched_pelt_se events. ' - 'Does the kernel support them?') - - df = trace.df_events(event) - df = df[df['comm'] == task_name][signal_fields] + df = trace.analysis.load_tracking.df_tasks_signals() + df = df[df['comm'] == task_name] window = self.get_task_window(trace, task_name, cpu) df = select_window(df, window) # Normalize the signal with the detected task execution start df.index -= window[0] - return df.rename(columns=dict(zip(signal_fields, signals))) + return df @staticmethod def is_almost_equal(target, value, allowed_delta_pct): @@ -271,7 +252,7 @@ class InvarianceBase(LoadTrackingBase): expected values :type allowed_error_pct: float """ - return self._test_signal('util_avg', allowed_error_pct) + return self._test_signal('util', allowed_error_pct) class CpuInvariance(InvarianceBase): """ @@ -396,7 +377,7 @@ class FreqInvarianceItem(InvarianceBase): expected values :type allowed_error_pct: float """ - return self._test_signal('load_avg', allowed_error_pct) + return self._test_signal('load', allowed_error_pct) class FreqInvariance(TestBundle, LoadTrackingHelpers): @@ -715,7 +696,7 @@ class PELTTask(LoadTrackingBase): :param allowed_error_pct: The allowed range difference """ - return self._test_range('util_avg', allowed_error_pct) + return self._test_range('util', allowed_error_pct) def test_load_avg_range(self, allowed_error_pct=15) -> ResultBundle: """ @@ -723,7 +704,7 @@ class PELTTask(LoadTrackingBase): :param allowed_error_pct: The allowed range difference """ - return self._test_range('load_avg', allowed_error_pct) + return self._test_range('load', allowed_error_pct) def test_util_avg_behaviour(self, error_margin_pct=5, allowed_error_pct=5)\ -> ResultBundle: @@ -736,7 +717,7 @@ class PELTTask(LoadTrackingBase): :param allowed_error_pct: How many PELT errors (determined by ``error_margin_pct```) are allowed """ - return self._test_behaviour('util_avg', error_margin_pct, allowed_error_pct) + return self._test_behaviour('util', error_margin_pct, allowed_error_pct) def test_load_avg_behaviour(self, error_margin_pct=5, allowed_error_pct=5)\ -> ResultBundle: @@ -749,6 +730,4 @@ class PELTTask(LoadTrackingBase): :param allowed_error_pct: How many PELT errors (determined by ``error_margin_pct```) are allowed """ - return self._test_behaviour('load_avg', error_margin_pct, allowed_error_pct) - - + return self._test_behaviour('load', error_margin_pct, allowed_error_pct) diff --git a/lisa/tests/kernel/scheduler/misfit.py b/lisa/tests/kernel/scheduler/misfit.py index beb7ea239..a10c3d873 100644 --- a/lisa/tests/kernel/scheduler/misfit.py +++ b/lisa/tests/kernel/scheduler/misfit.py @@ -24,6 +24,7 @@ from lisa.trace import Trace from lisa.wlgen.rta import Periodic from lisa.tests.kernel.test_bundle import RTATestBundle, Result, ResultBundle, CannotCreateError, TestMetric from lisa.env import TestEnv +from lisa.analysis.tasks import TaskState class MisfitMigrationBase(RTATestBundle): """ @@ -202,14 +203,12 @@ class StaggeredFinishes(MisfitMigrationBase): return profile - def _trim_lat_df(self, lat_df): - if lat_df.empty: - return lat_df + def _trim_state_df(self, state_df): + if state_df.empty: + return state_df - lat_df = Trace.squash_df(lat_df, self.start_time, - lat_df.index[-1] + lat_df.t_delta.values[-1], "t_delta") - # squash_df only updates t_delta, remove t_start to make sure it's not used - return lat_df.drop('t_start', 1) + return Trace.squash_df(state_df, self.start_time, + state_df.index[-1] + state_df.delta.values[-1], "delta") def test_preempt_time(self, allowed_preempt_pct=1) -> ResultBundle: """ @@ -217,13 +216,13 @@ class StaggeredFinishes(MisfitMigrationBase): """ sdf = self.trace.df_events('sched_switch') - latency_dfs = { - task : self.trace.analysis.latency.df_latency(task) + task_state_dfs = { + task : self.trace.analysis.tasks.df_task_states(task) for task in self.rtapp_profile.keys() } res = ResultBundle.from_bool(True) - for task, lat_df in latency_dfs.items(): + for task, state_df in task_state_dfs.items(): # The sched_switch dataframe where the misfit task # is replaced by another misfit task preempt_sdf = sdf[ @@ -231,15 +230,15 @@ class StaggeredFinishes(MisfitMigrationBase): (sdf.next_comm.str.startswith(self.task_prefix)) ] - lat_df = self._trim_lat_df( - lat_df[ - (lat_df.index.isin(preempt_sdf.index)) & + state_df = self._trim_state_df( + state_df[ + (state_df.index.isin(preempt_sdf.index)) & # Ensure this is a preemption and not just the task ending - (lat_df.curr_state == "S") + (state_df.curr_state == TaskState.TASK_INTERRUPTIBLE.char) ] ) - preempt_time = lat_df.t_delta.sum() + preempt_time = state_df.delta.sum() preempt_pct = (preempt_time / self.duration) * 100 res.add_metric("{} preemption".format(task), { @@ -282,17 +281,17 @@ class StaggeredFinishes(MisfitMigrationBase): return max_time, max_cpu - def _test_cpus_busy(self, latency_dfs, cpus, allowed_idle_time_s): + def _test_cpus_busy(self, task_state_dfs, cpus, allowed_idle_time_s): """ Test that for every window in which the tasks are running, :attr:`cpus` are not idle for more than :attr:`allowed_idle_time_s` """ res = ResultBundle.from_bool(True) - for task, lat_df in latency_dfs.items(): + for task, state_df in task_state_dfs.items(): # Have a look at every task activation - task_idle_times = [self._max_idle_time(index, index + row.t_delta, cpus) - for index, row in lat_df.iterrows()] + task_idle_times = [self._max_idle_time(index, index + row.delta, cpus) + for index, row in state_df.iterrows()] if not task_idle_times: continue @@ -322,22 +321,21 @@ class StaggeredFinishes(MisfitMigrationBase): first migration. """ - latency_dfs = {} + task_state_dfs = {} for task in self.rtapp_profile.keys(): - df = self.trace.analysis.latency.df_latency(task) - df = self._trim_lat_df(df[ - # Task is active - df.curr_state == "A" + df = self.trace.analysis.tasks.df_task_states(task) + df = self._trim_state_df(df[ + df.curr_state == TaskState.TASK_ACTIVE.char ]) # The first time the task runs on a big - first_big = df[df["__cpu"].isin(self.dst_cpus)].index[0] + first_big = df[df.cpu.isin(self.dst_cpus)].index[0] - df = df[df["__cpu"].isin(self.src_cpus)] + df = df[df.cpu.isin(self.src_cpus)] - latency_dfs[task] = df[:first_big] + task_state_dfs[task] = df[:first_big] - return self._test_cpus_busy(latency_dfs, self.dst_cpus, allowed_delay_s) + return self._test_cpus_busy(task_state_dfs, self.dst_cpus, allowed_delay_s) def test_throughput(self, allowed_idle_time_s=0.001) -> ResultBundle: """ @@ -350,16 +348,16 @@ class StaggeredFinishes(MisfitMigrationBase): pass. :type allowed_idle_time_s: int """ - latency_dfs = {} + task_state_dfs = {} for task in self.rtapp_profile.keys(): # This test is all about throughput: check that every time a task # runs on a little it's because bigs are busy - df = self.trace.analysis.latency.df_latency(task) - latency_dfs[task] = self._trim_lat_df(df[ + df = self.trace.analysis.tasks.df_task_states(task) + task_state_dfs[task] = self._trim_state_df(df[ # Task is active - (df.curr_state == "A") & + (df.curr_state == TaskState.TASK_ACTIVE.char) & # Task needs to be upmigrated - (df["__cpu"].isin(self.src_cpus)) + (df.cpu.isin(self.src_cpus)) ]) - return self._test_cpus_busy(latency_dfs, self.dst_cpus, allowed_idle_time_s) + return self._test_cpus_busy(task_state_dfs, self.dst_cpus, allowed_idle_time_s) -- GitLab From 71f4f8d5ba304b703985553b19361bfaf93c6a56 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 18:40:21 +0000 Subject: [PATCH 51/56] analysis/base: Add a MissingTraceEventError exception class --- lisa/analysis/base.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index a0cafa01b..bbd738440 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -34,6 +34,17 @@ COLOR_CYCLES = [ plt.rcParams['axes.prop_cycle'] = cycler(color=COLOR_CYCLES) +class MissingTraceEventError(RuntimeError): + """ + :param missing_events: The missing trace events + :type missing_events: list(str) + """ + def __init__(self, missing_events): + super().__init__( + "Trace is missing the following required events: {}".format(missing_events)) + + self.missing_events = missing_events + def requires_events(events): """ Decorator for methods that require some given trace events @@ -180,13 +191,12 @@ class AnalysisBase(Loggable): """ Check that certain trace events are available in the trace - :raises: RuntimeError if some events are not available + :raises: MissingTraceEventError if some events are not available """ available_events = sorted(set(self._trace.available_events)) missing_events = sorted(set(required_events).difference(available_events)) if missing_events: - raise RuntimeError( - "Trace is missing the following required events: {}".format(missing_events)) + raise MissingTraceEventError(missing_events) # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 -- GitLab From fad6ed1117b432c77073d5fb6e840aebc16c3508 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 18:58:20 +0000 Subject: [PATCH 52/56] platforms/platinfo: Rectify platinfo key types - CPU capacities are pure integer values, so ditch IntRealDict - Add correct types for "freqs" & "freq-domains" --- lisa/platforms/platinfo.py | 13 ++++--------- lisa/utils.py | 9 ++++++--- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/lisa/platforms/platinfo.py b/lisa/platforms/platinfo.py index 41a66322e..c61b745a1 100644 --- a/lisa/platforms/platinfo.py +++ b/lisa/platforms/platinfo.py @@ -15,18 +15,13 @@ # limitations under the License. # -import inspect -import contextlib -from collections import ChainMap from collections.abc import Mapping -from numbers import Real -from lisa.utils import HideExekallID, memoized, DeferredValue, IntRealDict, IntIntDict, StrIntListDict +from lisa.utils import HideExekallID, memoized, DeferredValue, IntIntDict, IntListList, IntIntListDict, StrIntListDict from lisa.utils import MultiSrcConf, KeyDesc, LevelKeyDesc, TopLevelKeyDesc from lisa.energy_model import EnergyModel from lisa.wlgen.rta import RTA -from trappy.stats.Topology import Topology from devlib.target import KernelVersion from devlib.exception import TargetStableError @@ -55,14 +50,14 @@ class PlatformInfo(MultiSrcConf, HideExekallID): KeyDesc('calib', 'RTapp calibration dictionary', [IntIntDict]), )), KeyDesc('nrg-model', 'Energy model object', [EnergyModel]), - KeyDesc('cpu-capacities', 'Dictionary of CPU ID to capacity value', [IntRealDict]), + KeyDesc('cpu-capacities', 'Dictionary of CPU ID to capacity value', [IntIntDict]), KeyDesc('kernel-version', '', [KernelVersion]), KeyDesc('abi', 'ABI, e.g. "arm64"', [str]), KeyDesc('os', 'OS being used, e.g. "linux"', [str]), KeyDesc('name', 'Free-form name of the board', [str]), KeyDesc('cpus-count', 'Compat key: number of CPUs', [int]), - KeyDesc('freq-domains', 'Frequency domains', [list]), - KeyDesc('freqs', 'Dictionnary of CPU to list of frequencies', [dict]), + KeyDesc('freq-domains', 'Frequency domains', [IntListList]), + KeyDesc('freqs', 'Dictionnary of CPU ID to list of frequencies', [IntIntListDict]), )) """Some keys have a reserved meaning with an associated type.""" diff --git a/lisa/utils.py b/lisa/utils.py index 3b1f8fc9f..a79664923 100644 --- a/lisa/utils.py +++ b/lisa/utils.py @@ -1118,12 +1118,15 @@ class TypedList(GenericContainerBase, list, metaclass=GenericSequenceMeta): class IntIntDict(TypedDict): _type = (int, int) -class IntRealDict(TypedDict): - _type = (int, numbers.Real) - class IntList(TypedList): _type = int +class IntIntListDict(TypedDict): + _type = (int, IntList) + +class IntListList(TypedList): + _type = IntList + class StrList(TypedList): _type = str -- GitLab From 9d61cc3326ebf98b748097d8ea47458735dda775 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 19:14:14 +0000 Subject: [PATCH 53/56] doc/analysis: Add status & thermal --- doc/analysis.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/doc/analysis.rst b/doc/analysis.rst index 1760aa733..c7433d1d3 100644 --- a/doc/analysis.rst +++ b/doc/analysis.rst @@ -54,3 +54,15 @@ Latency .. automodule:: lisa.analysis.latency :members: + +Status +====== + +.. automodule:: lisa.analysis.status + :members: + +Thermal +======= + +.. automodule:: lisa.analysis.thermal + :members: -- GitLab From 7ba70d373760f82629b9c6f74b8864c20f08b97c Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 19:14:47 +0000 Subject: [PATCH 54/56] analysis/base: Sort events in requires_events() --- lisa/analysis/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index bbd738440..1479594a9 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -56,14 +56,13 @@ def requires_events(events): """ def decorator(f): @functools.wraps(f) - def wrapper(self, *args, **kwargs): self.check_events(events) return f(self, *args, **kwargs) # Set an attribute on the wrapper itself, so it can be e.g. added # to the method documentation - wrapper.required_events = events + wrapper.required_events = sorted(set(events)) return wrapper return decorator -- GitLab From adf19ffc91adb3807c111c0b4b4900ef70bddec7 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 19:18:12 +0000 Subject: [PATCH 55/56] analysis: Cleanup imports --- lisa/analysis/base.py | 2 -- lisa/analysis/frequency.py | 5 +---- lisa/analysis/idle.py | 5 +---- lisa/analysis/latency.py | 12 +----------- 4 files changed, 3 insertions(+), 21 deletions(-) diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index 1479594a9..975993c08 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -22,8 +22,6 @@ import inspect import matplotlib.pyplot as plt from cycler import cycler -from trappy.utils import listify - from lisa.utils import Loggable # Colorblind-friendly cycle, see https://gist.github.com/thriveth/8560036 diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index 863f115b5..40e70f8c5 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -21,13 +21,10 @@ import os import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt +from matplotlib.ticker import FuncFormatter import pandas as pd import pylab as pl -from bart.common.Utils import area_under_curve -from matplotlib.ticker import FuncFormatter -from trappy.utils import listify - from lisa.analysis.base import AnalysisBase, requires_events from lisa.utils import memoized diff --git a/lisa/analysis/idle.py b/lisa/analysis/idle.py index d24059681..e467ccd5c 100644 --- a/lisa/analysis/idle.py +++ b/lisa/analysis/idle.py @@ -18,12 +18,9 @@ from functools import reduce import operator -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt import pandas as pd -import pylab as pl -from trappy.utils import listify, handle_duplicate_index +from trappy.utils import handle_duplicate_index from lisa.utils import memoized from lisa.analysis.base import AnalysisBase, requires_events diff --git a/lisa/analysis/latency.py b/lisa/analysis/latency.py index ecd8f037d..70488a52b 100644 --- a/lisa/analysis/latency.py +++ b/lisa/analysis/latency.py @@ -15,19 +15,9 @@ # limitations under the License. # -import re -import os - -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt -import numpy as np import pandas as pd -import pylab as pl - -from trappy.utils import listify -from devlib.target import KernelVersion +import numpy as np -from collections import namedtuple from lisa.analysis.base import AnalysisBase, requires_events, COLOR_CYCLES from lisa.analysis.tasks import TaskState, TasksAnalysis from lisa.utils import memoized -- GitLab From 359499dd6456cdc5c19daf7d34fa0769e8e9b205 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 30 Nov 2018 19:27:45 +0000 Subject: [PATCH 56/56] analysis/latency: Remove CDF unbiasing voodoo incantation Turns out trusting strangers on the web is not foolproof. --- lisa/analysis/latency.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lisa/analysis/latency.py b/lisa/analysis/latency.py index 70488a52b..70f6252f5 100644 --- a/lisa/analysis/latency.py +++ b/lisa/analysis/latency.py @@ -226,11 +226,6 @@ class LatencyAnalysis(AnalysisBase): # Build the series of sorted values ser = data.sort_values() - if len(ser) < 1000: - # Unbias the CDF for small populations - # https://stackoverflow.com/a/31971245/5096023 - ser = ser.append(pd.Series(ser.iloc[-1])) - df = pd.Series(np.linspace(0., 1., len(ser)), index=ser) # Compute percentage of samples above/below the specified threshold -- GitLab