diff --git a/doc/analysis.rst b/doc/analysis.rst new file mode 100644 index 0000000000000000000000000000000000000000..c7433d1d3e51fa5556361a3f5fae293b5316fd1d --- /dev/null +++ b/doc/analysis.rst @@ -0,0 +1,68 @@ +********************* +Kernel trace analysis +********************* + +Base class +========== + +.. automodule:: lisa.analysis.base + :members: + +Load tracking +============= + +.. automodule:: lisa.analysis.load_tracking + :members: + +CPUs +==== + +.. automodule:: lisa.analysis.cpus + :members: + +Frequency +========= + +.. automodule:: lisa.analysis.frequency + :members: + +Tasks +===== + +.. These two autoclasses should not be necessary, but sphinx doesn't seem + to like Enums and refuses to do anything with TaskState unless explicetely + told to. + +.. autoclass:: lisa.analysis.tasks.StateInt + :members: + +.. autoclass:: lisa.analysis.tasks.TaskState + :members: + +.. automodule:: lisa.analysis.tasks + :members: + :exclude-members: StateInt, TaskState + +Idle +==== + +.. automodule:: lisa.analysis.idle + :members: + +Latency +======= + +.. automodule:: lisa.analysis.latency + :members: + +Status +====== + +.. automodule:: lisa.analysis.status + :members: + +Thermal +======= + +.. automodule:: lisa.analysis.thermal + :members: diff --git a/doc/conf.py b/doc/conf.py index e2faab9bef3a15d51dab5e326a297d049e966812..fac1a1c3bd46fae12db1d1739d3ab348106d89f4 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -330,7 +330,7 @@ texinfo_documents = [ intersphinx_mapping = { 'python' : ('https://docs.python.org/3', None), 'pandas' : ('https://pandas.pydata.org/pandas-docs/stable/', None), - 'matplotlib' : ('http://matplotlib.sourceforge.net/', None), + 'matplotlib' : ('https://matplotlib.org', None), # XXX: Doesn't seem to work, might be due to how devlib doc is generated 'devlib' : ('https://pythonhosted.org/devlib/', None), 'trappy' : ('https://pythonhosted.org/TRAPpy', None), @@ -372,7 +372,7 @@ def is_test(method): for cls in base_cls_list ) -def autodoc_process_docstring(app, what, name, obj, options, lines): +def autodoc_process_test_method(app, what, name, obj, options, lines): # Append the list of available test methods for all classes that appear to # have some. if what == 'class': @@ -390,7 +390,20 @@ def autodoc_process_docstring(app, what, name, obj, options, lines): lines.extend(test_list_doc.splitlines()) +def autodoc_process_analysis_events(app, what, name, obj, options, lines): + # Append the list of required trace events + if what != 'method' or not hasattr(obj, "required_events"): + return + + events = obj.required_events + + events_doc = "\n:Required trace events:\n\n{}\n\n".format( + "\n".join([" * ``{}``".format(event) for event in events])) + + lines.extend(events_doc.splitlines()) + def setup(app): - app.connect('autodoc-process-docstring', autodoc_process_docstring) + app.connect('autodoc-process-docstring', autodoc_process_test_method) + app.connect('autodoc-process-docstring', autodoc_process_analysis_events) # vim :set tabstop=4 shiftwidth=4 textwidth=80 expandtab diff --git a/doc/index.rst b/doc/index.rst index d76b9d6174dc516e3541155b4930ca0133153dc3..c3d0203fb30e7106d4f27fe1d6046f727d6cfd80 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -34,6 +34,7 @@ Contents: kernel_tests wlgen internals + analysis lisa_tests diff --git a/lisa/analysis/base.py b/lisa/analysis/base.py index fe222ff318d3b4144915a038cc483a831e527079..975993c082befb89805d1440b68c239d27da2c8f 100644 --- a/lisa/analysis/base.py +++ b/lisa/analysis/base.py @@ -15,168 +15,185 @@ # limitations under the License. # -import logging -from collections import namedtuple +import functools +import os +import inspect -import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt -import pandas as pd -import pylab as pl +from cycler import cycler -from trappy.utils import listify +from lisa.utils import Loggable -""" Helper module for Analysis classes """ +# Colorblind-friendly cycle, see https://gist.github.com/thriveth/8560036 +COLOR_CYCLES = [ + '#377eb8', '#ff7f00', '#4daf4a', + '#f781bf', '#a65628', '#984ea3', + '#999999', '#e41a1c', '#dede00'] -ResidencyTime = namedtuple('ResidencyTime', ['total', 'active']) -ResidencyData = namedtuple('ResidencyData', ['label', 'residency']) +plt.rcParams['axes.prop_cycle'] = cycler(color=COLOR_CYCLES) -class AnalysisBase: +class MissingTraceEventError(RuntimeError): + """ + :param missing_events: The missing trace events + :type missing_events: list(str) + """ + def __init__(self, missing_events): + super().__init__( + "Trace is missing the following required events: {}".format(missing_events)) + + self.missing_events = missing_events + +def requires_events(events): + """ + Decorator for methods that require some given trace events + + :param events: The list of required events + :type events: list(str) + + The decorate method must inherit from :class:`AnalysisBase` + """ + def decorator(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + self.check_events(events) + return f(self, *args, **kwargs) + + # Set an attribute on the wrapper itself, so it can be e.g. added + # to the method documentation + wrapper.required_events = sorted(set(events)) + return wrapper + + return decorator + +class AnalysisBase(Loggable): """ Base class for Analysis modules. :param trace: input Trace object :type trace: :class:`trace.Trace` + + :Design notes: + + Method depending on certain trace events *must* be decorated with + :meth:`lisa.analysis.base.requires_events` + + Plotting methods *must* return the :class:`matplotlib.axes.Axes` instance + used by the plotting method. This lets users further modify them. """ def __init__(self, trace): - self._log = logging.getLogger('Analysis') self._trace = trace - plat_info = self._trace.plat_info + @classmethod + def setup_plot(cls, width=16, height=4, ncols=1, nrows=1, **kwargs): + """ + Common helper for setting up a matplotlib plot + + :param width: Width of the plot (inches) + :type width: int or float - # By default assume SMP system - self._big_cap = 1024 - self._little_cap = 1024 - self._big_cpus = list(range(trace.cpus_count)) - self._little_cpus = [] + :param height: Height of each subplot (inches) + :type height: int or float - if self._trace.has_big_little: - nrg_model = plat_info['nrg-model'] - self._little_cap = nrg_model.get_cpu_capacity(nrg_model.littlest_cpus[0]) + :param ncols: Number of plots on a single row + :type ncols: int - if ('clusters' in plat_info and - 'big' in plat_info['clusters'] and - 'little' in plat_info['clusters']): - self._big_cpus = plat_info['clusters']['big'] - self._little_cpus = plat_info['clusters']['little'] + :param nrows: Number of plots in a single column + :type nrows: int - def _plot_setup(self, width=16, height=4, ncols=1, nrows=1): + :Keywords arguments: Extra arguments to pass to + :obj:`matplotlib.pyplot.subplots` + + :returns: tuple(matplotlib.figure.Figure, matplotlib.axes.Axes (or an + array of, if ``nrows`` > 1)) + """ figure, axes = plt.subplots( - ncols=ncols, nrows=nrows, figsize=(width, height * nrows) + ncols=ncols, nrows=nrows, figsize=(width, height * nrows), **kwargs ) # Needed for multirow plots to not overlap with each other plt.tight_layout(h_pad=3.5) return figure, axes - def _plot_generic(self, dfr, pivot, filters=None, columns=None, - prettify_name=None, width=16, height=4, - drawstyle="default", ax=None, title=""): + @classmethod + def cycle_colors(cls, axis, nr_cycles): """ - Generic trace plotting method + Cycle the axis color cycle ``nr_cycles`` forward + + :param axis: The axis to manipulate + :type axis: matplotlib.axes.Axes + + :param nr_cycles: The number of colors to cycle through. + :type nr_cycles: int - The values in the column 'pivot' will be used as discriminant + .. note:: - Let's consider a df with these columns: + This is an absolute cycle, as in, it will always start from the first + color defined in the color cycle. - | time | cpu | load_avg | util_avg | - ==================================== - | 42 | 2 | 1812 | 400 | - ------------------------------------ - | 43 | 0 | 1337 | 290 | - ------------------------------------ - | .. | ... | .. | .. | + """ + if nr_cycles < 1: + return - To plot the 'util_avg' value of CPU2, the function would be used like so: - :: - plot_generic(df, pivot='cpu', filters={'cpu' : [2]}, columns='util_avg') + colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] - CPUs could be compared by using: - :: - plot_generic(df, pivot='cpu', filters={'cpu' : [2, 3]}, columns='util_avg') + if nr_cycles > len(colors): + nr_cycles -= len(colors) - :param dfr: Trace dataframe - :type dfr: `pandas.DataFrame` + axis.set_prop_cycle(cycler(color=colors[nr_cycles:] + colors[:nr_cycles])) - :param pivot: Name of column that will serve as a pivot - :type pivot: str + @classmethod + def get_next_color(cls, axis): + """ + Get the next color that will be used to draw lines on the axis - :param filters: Dataframe column filters - :type filters: dict + :param axis: The axis + :type axis: matplotlib.axes.Axes - :param columns: Name of columns whose data will be plotted - :type columns: str or list(str) + .. warning:: - :param prettify_name: user-friendly stringify function for pivot values - :type prettify_name: callable[str] + This will consume the color from the cycler, which means it will + change which color is to be used next. - :param width: The width of the plot - :type width: int + """ + # XXX: We're accessing some private data here, so that could break eventually + # Need to find another way to get the current color from the cycler, or to + # plot all data from a dataframe in the same color. + return next(axis._get_lines.prop_cycler)['color'] - :param height: The height of the plot - :type height: int + def save_plot(self, figure, filepath=None, img_format="png"): + """ + Save the plot stored in the ``figure`` + + :param figure: The plot figure + :type figure: matplotlib.figure.Figure + + :param filepath: The path of the file into which the plot will be saved. + If ``None``, a path based on the trace directory and the calling method + will be used. + :type filepath: str + + :param img_format: The image format to generate + :type img_format: str + """ + if filepath is None: + module = self.__module__ + caller = inspect.stack()[1][3] + filepath = os.path.join( + self._trace.plots_dir, + "{}.{}.{}".format(module, caller, img_format)) + + figure.savefig(filepath, format=img_format) + + def check_events(self, required_events): + """ + Check that certain trace events are available in the trace - :param drawstyle: The drawstyle setting of the plot - :type drawstyle: str + :raises: MissingTraceEventError if some events are not available """ + available_events = sorted(set(self._trace.available_events)) + missing_events = sorted(set(required_events).difference(available_events)) - if prettify_name is None: - def prettify_name(name): return '{}={}'.format(pivot, name) - - if pivot not in dfr.columns: - raise ValueError('Invalid "pivot" parameter value: no {} column' - .format(pivot) - ) - - if columns is None: - # Find available columns - columns = dfr.columns.tolist() - columns.remove(pivot) - else: - # Filter out unwanted columns - columns = listify(columns) - try: - dfr = dfr[columns + [pivot]] - except KeyError as err: - raise ValueError('Invalid "columns" parameter value: {}' - .format(err.message) - ) - - # Apply filters - if filters is None: - filters = {} - - for col, vals in filters.items(): - dfr = dfr[dfr[col].isin(vals)] - - setup_plot = False - if ax is None: - _, ax = self._plot_setup(width, height) - setup_plot = True - - matches = dfr[pivot].unique().tolist() - - for match in matches: - renamed_cols = [] - for col in columns: - renamed_cols.append('{} {}'.format(prettify_name(match), col)) - - plot_dfr = dfr[dfr[pivot] == match][columns] - plot_dfr.columns = renamed_cols - plot_dfr.plot(ax=ax, drawstyle=drawstyle) - - if setup_plot: - ax.set_title(title) - - ax.set_xlim(self._trace.x_min, self._trace.x_max) - - # Extend ylim for better visibility - cur_lim = ax.get_ylim() - lim = (cur_lim[0] - 0.1 * (cur_lim[1] - cur_lim[0]), - cur_lim[1] + 0.1 * (cur_lim[1] - cur_lim[0])) - ax.set_ylim(lim) - - plt.legend() - - return ax + if missing_events: + raise MissingTraceEventError(missing_events) # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/lisa/analysis/cpus.py b/lisa/analysis/cpus.py index 1228cc23c4b6f4cdc7815e78c55f636b47ce127f..474730630bce06fe4446b3443937443463eab396 100644 --- a/lisa/analysis/cpus.py +++ b/lisa/analysis/cpus.py @@ -17,19 +17,15 @@ """ CPUs Analysis Module """ -import matplotlib.pyplot as plt -import pylab as pl import pandas as pd -from lisa.analysis.base import AnalysisBase +from lisa.utils import memoized +from lisa.analysis.base import AnalysisBase, requires_events class CpusAnalysis(AnalysisBase): """ - Support for CPUs Signals Analysis - - :param trace: input Trace object - :type trace: :class:`Trace` + Support for CPUs signals analysis """ name = 'cpus' @@ -42,175 +38,58 @@ class CpusAnalysis(AnalysisBase): # DataFrame Getter Methods ############################################################################### + @requires_events(['sched_switch']) def df_context_switches(self): """ Compute number of context switches on each CPU. - :returns: :mod:`pandas.DataFrame` - """ - if not self._trace.hasEvents('sched_switch'): - self._log.warning('Events [sched_switch] not found, context switch ' - 'computation not possible!') - return None + :returns: A :class:`pandas.DataFrame` with: + * A ``context_switch_cnt`` column (the number of context switch per CPU) + """ sched_df = self._trace.df_events('sched_switch') - cpus = list(range(self._trace.plat_info['cpus-count'])) + cpus = list(range(self._trace.cpus_count)) ctx_sw_df = pd.DataFrame( [len(sched_df[sched_df['__cpu'] == cpu]) for cpu in cpus], index=cpus, columns=['context_switch_cnt'] ) ctx_sw_df.index.name = 'cpu' - return ctx_sw_df - def df_cpu_wakeups(self, cpus=None): - """" - Get a DataFrame showing when a CPU was woken from idle - - :param cpus: List of CPUs to find wakeups for. If None, all CPUs. - :type cpus: list(int) or None - - :returns: :mod:`pandas.DataFrame` with one column ``cpu``, where each - row shows a time when the given ``cpu`` was woken up from - idle. - """ - if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, cannot ' - 'get CPU wakeup events.') - return None - - cpus = cpus or list(range(self._trace.cpus_count)) - - sr = pd.Series() - for cpu in cpus: - cpu_sr = self._trace.getCPUActiveSignal(cpu) - cpu_sr = cpu_sr[cpu_sr == 1] - cpu_sr = cpu_sr.replace(1, cpu) - sr = sr.append(cpu_sr) - - return pd.DataFrame({'cpu': sr}).sort_index() + return ctx_sw_df ############################################################################### # Plotting Methods ############################################################################### - def plot_cpu(self, cpus=None): + @requires_events(df_context_switches.required_events) + def plot_context_switches(self, filepath=None): """ - Plot CPU-related signals for both big and LITTLE clusters. - - :param cpus: list of CPUs to be plotted - :type cpus: list(int) + Plot histogram of context switches on each CPU. """ - if not self._trace.hasEvents('sched_load_avg_cpu'): - self._log.warning('Events [sched_load_avg_cpu] not found, ' - 'plot DISABLED!') - return - - # Filter on specified cpus - if cpus is None: - cpus = sorted(self._big_cpus + self._little_cpus) - - # Plot: big CPUs - bcpus = set(cpus).intersection(self._big_cpus) - if bcpus: - self._plot_cpu(bcpus, "big") - - # Plot: LITTLE CPUs - lcpus = set(cpus).intersection(self._little_cpus) - if lcpus: - self._plot_cpu(lcpus, "LITTLE") + fig, axis = self.setup_plot(height=8) + ctx_sw_df = self.df_context_switches() + ctx_sw_df["context_switch_cnt"].plot.bar( + title="Per-CPU Task Context Switches", legend=False, ax=axis) + axis.grid() -############################################################################### -# Utility Methods -############################################################################### + self.save_plot(fig, filepath) + return axis - def _plot_cpu(self, cpus, label=''): + def plot_orig_capacity(self, axis, cpu): """ - Internal method that generates plots for all input CPUs. + Plot the orig capacity of a CPU onto a given axis - :param cpus: list of CPUs to be plotted - :type cpus: list(int) - """ - if label != '': - label1 = '{} '.format(label) - label2 = '_{}s'.format(label.lower()) - - # Plot required CPUs - _, pltaxes = plt.subplots(len(cpus), 1, figsize=(16, 3*(len(cpus)))) - - idx = 0 - for cpu in cpus: - - # Reference axes to be used - axes = pltaxes - if len(cpus) > 1: - axes = pltaxes[idx] - - # Add CPU utilization - axes.set_title('{0:s}CPU [{1:d}]'.format(label1, cpu)) - df = self._trace.df_events('sched_load_avg_cpu') - df = df[df.cpu == cpu] - if len(df): - df[['util_avg']].plot(ax=axes, drawstyle='steps-post', - alpha=0.4) - - # if self._trace.hasEvents('sched_boost_cpu'): - # df = self._trace.df_events('sched_boost_cpu') - # df = df[df.cpu == cpu] - # if len(df): - # df[['usage', 'boosted_usage']].plot( - # ax=axes, - # style=['m-', 'r-'], - # drawstyle='steps-post'); - - # Add Capacities data if avilable - if self._trace.hasEvents('cpu_capacity'): - df = self._trace.df_events('cpu_capacity') - df = df[df.cpu == cpu] - if len(df): - # data = df[['capacity', 'tip_capacity', 'max_capacity']] - # data.plot(ax=axes, style=['m', 'y', 'r'], - data = df[['capacity', 'tip_capacity']] - data.plot(ax=axes, style=['m', '--y'], - drawstyle='steps-post') - - # Add overutilized signal to the plot - self._trace.analysis.status.plot_overutilized(axes) - - axes.set_ylim(0, 1100) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - - if idx == 0: - axes.annotate("{}CPUs Signals".format(label1), - xy=(0, axes.get_ylim()[1]), - xytext=(-50, 25), - textcoords='offset points', fontsize=16) - # Disable x-axis timestamp for top-most cpus - if len(cpus) > 1 and idx < len(cpus)-1: - axes.set_xticklabels([]) - axes.set_xlabel('') - axes.grid(True) - - idx += 1 - - # Save generated plots into datadir - figname = '{}/{}cpus{}.png'.format(self._trace.plots_dir, - self._trace.plots_prefix, label2) - pl.savefig(figname, bbox_inches='tight') - - def plot_context_switch(self): - """ - Plot histogram of context switches on each CPU. - """ - if not self._trace.hasEvents('sched_switch'): - self._log.warning('Events [sched_switch] not found, plot DISABLED!') - return + :param axis: The axis + :type axis: matplotlib.axes.Axes - ctx_sw_df = self.df_context_switches() - ax = ctx_sw_df.plot.bar(title="Per-CPU Task Context Switches", - legend=False, - figsize=(16, 8)) - ax.grid() + :param cpu: The CPU + :type cpu: int + """ + if "cpu-capacities" in self._trace.plat_info: + axis.axhline(self._trace.plat_info["cpu-capacities"][cpu], + color=self.get_next_color(axis), + linestyle='--', label="orig_capacity") # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/lisa/analysis/eas.py b/lisa/analysis/eas.py deleted file mode 100644 index 675d10070fbaf148ab2aa0236278da37fa93939c..0000000000000000000000000000000000000000 --- a/lisa/analysis/eas.py +++ /dev/null @@ -1,403 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# -# Copyright (C) 2015, ARM Limited and contributors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" EAS-specific Analysis Module """ - -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt -import pylab as pl - -from lisa.analysis.base import AnalysisBase - - -class EasAnalysis(AnalysisBase): - """ - Support for EAS signals anaysis - - :param trace: input Trace object - :type trace: :class:`trace.Trace` - """ - - name = 'eas' - - def __init__(self, trace): - super(EasAnalysis, self).__init__(trace) - -############################################################################### -# DataFrame Getter Methods -############################################################################### - - -############################################################################### -# Plotting Methods -############################################################################### - - def plot_e_diff_time(self, tasks=None, - min_usage_delta=None, max_usage_delta=None, - min_cap_delta=None, max_cap_delta=None, - min_nrg_delta=None, max_nrg_delta=None, - min_nrg_diff=None, max_nrg_diff=None): - """ - Plot energy_diff()-related signals on time axes. - """ - if not self._trace.hasEvents('sched_energy_diff'): - self._log.warning('Event [sched_energy_diff] not found, plot DISABLED!') - return - df = self._trace.df_events('sched_energy_diff') - - # Filter on 'tasks' - if tasks is not None: - self._log.info('Plotting EDiff data just for task(s) [%s]', tasks) - df = df[df['comm'].isin(tasks)] - - # Filter on 'usage_delta' - if min_usage_delta is not None: - self._log.info('Plotting EDiff data just with minimum ' - 'usage_delta of [%d]', min_usage_delta) - df = df[abs(df['usage_delta']) >= min_usage_delta] - if max_usage_delta is not None: - self._log.info('Plotting EDiff data just with maximum ' - 'usage_delta of [%d]', max_usage_delta) - df = df[abs(df['usage_delta']) <= max_usage_delta] - - # Filter on 'cap_delta' - if min_cap_delta is not None: - self._log.info('Plotting EDiff data just with minimum ' - 'cap_delta of [%d]', min_cap_delta) - df = df[abs(df['cap_delta']) >= min_cap_delta] - if max_cap_delta is not None: - self._log.info('Plotting EDiff data just with maximum ' - 'cap_delta of [%d]', max_cap_delta) - df = df[abs(df['cap_delta']) <= max_cap_delta] - - # Filter on 'nrg_delta' - if min_nrg_delta is not None: - self._log.info('Plotting EDiff data just with minimum ' - 'nrg_delta of [%d]', min_nrg_delta) - df = df[abs(df['nrg_delta']) >= min_nrg_delta] - if max_nrg_delta is not None: - self._log.info('Plotting EDiff data just with maximum ' - 'nrg_delta of [%d]', max_nrg_delta) - df = df[abs(df['nrg_delta']) <= max_nrg_delta] - - # Filter on 'nrg_diff' - if min_nrg_diff is not None: - self._log.info('Plotting EDiff data just with minimum ' - 'nrg_diff of [%d]', min_nrg_diff) - df = df[abs(df['nrg_diff']) >= min_nrg_diff] - if max_nrg_diff is not None: - self._log.info('Plotting EDiff data just with maximum ' - 'nrg_diff of [%d]', max_nrg_diff) - df = df[abs(df['nrg_diff']) <= max_nrg_diff] - - # Grid: setup stats for gris - gs = gridspec.GridSpec(4, 3, height_ratios=[2, 4, 2, 4]) - gs.update(wspace=0.1, hspace=0.1) - - # Configure plot - fig = plt.figure(figsize=(16, 8*2+4*2+2)) - plt.suptitle("EnergyDiff Data", - y=.92, fontsize=16, horizontalalignment='center') - - # Plot1: src and dst CPUs - axes = plt.subplot(gs[0, :]) - axes.set_title('Source and Destination CPUs') - df[['src_cpu', 'dst_cpu']].plot(ax=axes, style=['bo', 'r+']) - axes.set_ylim(-1, self._trace.plat_info['cpus-count']+1) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.grid(True) - axes.set_xticklabels([]) - axes.set_xlabel('') - self._trace.analysis.status.plot_overutilized(axes) - - # Plot2: energy and capacity variations - axes = plt.subplot(gs[1, :]) - axes.set_title('Energy vs Capacity Variations') - - colors_labels = list(zip('gbyr', ['Optimal Accept', 'SchedTune Accept', - 'SchedTune Reject', 'Suboptimal Reject'])) - for color, label in colors_labels: - subset = df[df.nrg_payoff_group == label] - if len(subset) == 0: - continue - subset[['nrg_diff_pct']].plot(ax=axes, style=[color+'o']) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_yscale('symlog') - axes.grid(True) - axes.set_xticklabels([]) - axes.set_xlabel('') - self._trace.analysis.status.plot_overutilized(axes) - - # Plot3: energy payoff - axes = plt.subplot(gs[2, :]) - axes.set_title('Energy Payoff Values') - for color, label in colors_labels: - subset = df[df.nrg_payoff_group == label] - if len(subset) == 0: - continue - subset[['nrg_payoff']].plot(ax=axes, style=[color+'o']) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_yscale('symlog') - axes.grid(True) - axes.set_xticklabels([]) - axes.set_xlabel('') - self._trace.analysis.status.plot_overutilized(axes) - - # Plot4: energy deltas (kernel and host computed values) - axes = plt.subplot(gs[3, :]) - axes.set_title('Energy Deltas Values') - df[['nrg_delta', 'nrg_diff_pct']].plot(ax=axes, style=['ro', 'b+']) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.grid(True) - self._trace.analysis.status.plot_overutilized(axes) - - # Save generated plots into datadir - figname = '{}/{}ediff_time.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix) - pl.savefig(figname, bbox_inches='tight') - - # Grid: setup stats for gris - gs = gridspec.GridSpec(1, 3, height_ratios=[2]) - gs.update(wspace=0.1, hspace=0.1) - - fig = plt.figure(figsize=(16, 4)) - - # Plot: usage, capacity and energy distributuions - axes = plt.subplot(gs[0, 0]) - df[['usage_delta']].hist(ax=axes, bins=60) - axes = plt.subplot(gs[0, 1]) - df[['cap_delta']].hist(ax=axes, bins=60) - axes = plt.subplot(gs[0, 2]) - df[['nrg_delta']].hist(ax=axes, bins=60) - - # Save generated plots into datadir - figname = '{}/{}ediff_stats.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix) - pl.savefig(figname, bbox_inches='tight') - - def plot_e_diff_space(self, tasks=None, - min_usage_delta=None, max_usage_delta=None, - min_cap_delta=None, max_cap_delta=None, - min_nrg_delta=None, max_nrg_delta=None, - min_nrg_diff=None, max_nrg_diff=None): - """ - Plot energy_diff()-related signals on the Performance-Energy space - (PxE). - """ - if not self._trace.hasEvents('sched_energy_diff'): - self._log.warning('Event [sched_energy_diff] not found, plot DISABLED!') - return - df = self._trace.df_events('sched_energy_diff') - - # Filter on 'tasks' - if tasks is not None: - self._log.info('Plotting EDiff data just for task(s) [%s]', tasks) - df = df[df['comm'].isin(tasks)] - - # Filter on 'usage_delta' - if min_usage_delta is not None: - self._log.info('Plotting EDiff data just with minimum ' - 'usage_delta of [%d]', min_usage_delta) - df = df[abs(df['usage_delta']) >= min_usage_delta] - if max_usage_delta is not None: - self._log.info('Plotting EDiff data just with maximum ' - 'usage_delta of [%d]', max_usage_delta) - df = df[abs(df['usage_delta']) <= max_usage_delta] - - # Filter on 'cap_delta' - if min_cap_delta is not None: - self._log.info('Plotting EDiff data just with minimum ' - 'cap_delta of [%d]', min_cap_delta) - df = df[abs(df['cap_delta']) >= min_cap_delta] - if max_cap_delta is not None: - self._log.info('Plotting EDiff data just with maximum ' - 'cap_delta of [%d]', max_cap_delta) - df = df[abs(df['cap_delta']) <= max_cap_delta] - - # Filter on 'nrg_delta' - if min_nrg_delta is not None: - self._log.info('Plotting EDiff data just with minimum ' - 'nrg_delta of [%d]', min_nrg_delta) - df = df[abs(df['nrg_delta']) >= min_nrg_delta] - if max_nrg_delta is not None: - self._log.info('Plotting EDiff data just with maximum ' - 'nrg_delta of [%d]', max_nrg_delta) - df = df[abs(df['nrg_delta']) <= max_nrg_delta] - - # Filter on 'nrg_diff' - if min_nrg_diff is not None: - self._log.info('Plotting EDiff data just with minimum ' - 'nrg_diff of [%d]', min_nrg_diff) - df = df[abs(df['nrg_diff']) >= min_nrg_diff] - if max_nrg_diff is not None: - self._log.info('Plotting EDiff data just with maximum ' - 'nrg_diff of [%d]', max_nrg_diff) - df = df[abs(df['nrg_diff']) <= max_nrg_diff] - - # Grid: setup grid for P-E space - gs = gridspec.GridSpec(1, 2, height_ratios=[2]) - gs.update(wspace=0.1, hspace=0.1) - - fig = plt.figure(figsize=(16, 8)) - - # Get min-max of each axes - x_min = df.nrg_diff_pct.min() - x_max = df.nrg_diff_pct.max() - y_min = df.cap_delta.min() - y_max = df.cap_delta.max() - axes_min = min(x_min, y_min) - axes_max = max(x_max, y_max) - - # # Tag columns by usage_delta - # ccol = df.usage_delta - # df['usage_delta_group'] = np.select( - # [ccol < 150, ccol < 400, ccol < 600], - # ['< 150', '< 400', '< 600'], '>= 600') - # - # # Tag columns by nrg_payoff - # ccol = df.nrg_payoff - # df['nrg_payoff_group'] = np.select( - # [ccol > 2e9, ccol > 0, ccol > -2e9], - # ['Optimal Accept', 'SchedTune Accept', 'SchedTune Reject'], - # 'Suboptimal Reject') - - # Plot: per usage_delta values - axes = plt.subplot(gs[0, 0]) - - for color, label in zip('bgyr', ['< 150', '< 400', '< 600', '>= 600']): - subset = df[df.usage_delta_group == label] - if len(subset) == 0: - continue - plt.scatter(subset.nrg_diff_pct, subset.cap_delta, - s=subset.usage_delta, - c=color, label='task_usage ' + str(label), - axes=axes) - - # Plot space axes - plt.plot((0, 0), (-1025, 1025), 'y--', axes=axes) - plt.plot((-1025, 1025), (0, 0), 'y--', axes=axes) - - # # Perf cuts - # plt.plot((0, 100), (0, 100*delta_pb), 'b--', - # label='PB (Perf Boost)') - # plt.plot((0, -100), (0, -100*delta_pc), 'r--', - # label='PC (Perf Constraint)') - # - # # Perf boost setups - # for y in range(0,6): - # plt.plot((0, 500), (0,y*100), 'g:') - # for x in range(0,5): - # plt.plot((0, x*100), (0,500), 'g:') - - axes.legend(loc=4, borderpad=1) - - plt.xlim(1.1*axes_min, 1.1*axes_max) - plt.ylim(1.1*axes_min, 1.1*axes_max) - - # axes.title('Performance-Energy Space') - axes.set_xlabel('Energy diff [%]') - axes.set_ylabel('Capacity diff [%]') - - # Plot: per usage_delta values - axes = plt.subplot(gs[0, 1]) - - colors_labels = list(zip('gbyr', ['Optimal Accept', 'SchedTune Accept', - 'SchedTune Reject', 'Suboptimal Reject'])) - for color, label in colors_labels: - subset = df[df.nrg_payoff_group == label] - if len(subset) == 0: - continue - plt.scatter(subset.nrg_diff_pct, subset.cap_delta, - s=60, - c=color, - marker='+', - label='{} Region'.format(label), - axes=axes) - # s=subset.usage_delta, - - # Plot space axes - plt.plot((0, 0), (-1025, 1025), 'y--', axes=axes) - plt.plot((-1025, 1025), (0, 0), 'y--', axes=axes) - - # # Perf cuts - # plt.plot((0, 100), (0, 100*delta_pb), 'b--', - # label='PB (Perf Boost)') - # plt.plot((0, -100), (0, -100*delta_pc), 'r--', - # label='PC (Perf Constraint)') - # - # # Perf boost setups - # for y in range(0,6): - # plt.plot((0, 500), (0,y*100), 'g:') - # for x in range(0,5): - # plt.plot((0, x*100), (0,500), 'g:') - - axes.legend(loc=4, borderpad=1) - - plt.xlim(1.1*axes_min, 1.1*axes_max) - plt.ylim(1.1*axes_min, 1.1*axes_max) - - # axes.title('Performance-Energy Space') - axes.set_xlabel('Energy diff [%]') - axes.set_ylabel('Capacity diff [%]') - - plt.title('Performance-Energy Space') - - # Save generated plots into datadir - figname = '{}/{}ediff_space.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix) - pl.savefig(figname, bbox_inches='tight') - - def plot_sched_tune_conf(self): - """ - Plot the configuration of SchedTune. - """ - if not self._trace.hasEvents('sched_tune_config'): - self._log.warning('Event [sched_tune_config] not found, plot DISABLED!') - return - # Grid - gs = gridspec.GridSpec(2, 1, height_ratios=[4, 1]) - gs.update(wspace=0.1, hspace=0.1) - - # Figure - plt.figure(figsize=(16, 2*6)) - plt.suptitle("SchedTune Configuration", - y=.97, fontsize=16, horizontalalignment='center') - - # Plot: Margin - axes = plt.subplot(gs[0, 0]) - axes.set_title('Margin') - data = self._trace.df_events('sched_tune_config')[['margin']] - data.plot(ax=axes, drawstyle='steps-post', style=['b']) - axes.set_ylim(0, 110) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.xaxis.set_visible(False) - - # Plot: Boost mode - axes = plt.subplot(gs[1, 0]) - axes.set_title('Boost mode') - data = self._trace.df_events('sched_tune_config')[['boostmode']] - data.plot(ax=axes, drawstyle='steps-post') - axes.set_ylim(0, 4) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.xaxis.set_visible(True) - - # Save generated plots into datadir - figname = '{}/{}schedtune_conf.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix) - pl.savefig(figname, bbox_inches='tight') - -# vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/lisa/analysis/frequency.py b/lisa/analysis/frequency.py index fb39d571bca96861b2466d1088b41109f3daf1c7..40e70f8c5fafd32fa78f4c2c246d74039d35def8 100644 --- a/lisa/analysis/frequency.py +++ b/lisa/analysis/frequency.py @@ -17,18 +17,16 @@ """ Frequency Analysis Module """ +import os + import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt -import operator -import os +from matplotlib.ticker import FuncFormatter import pandas as pd import pylab as pl -from lisa.analysis.base import AnalysisBase, ResidencyTime, ResidencyData +from lisa.analysis.base import AnalysisBase, requires_events from lisa.utils import memoized -from bart.common.Utils import area_under_curve -from matplotlib.ticker import FuncFormatter -from trappy.utils import listify class FrequencyAnalysis(AnalysisBase): """ @@ -43,11 +41,66 @@ class FrequencyAnalysis(AnalysisBase): def __init__(self, trace): super(FrequencyAnalysis, self).__init__(trace) -############################################################################### -# DataFrame Getter Methods -############################################################################### + @memoized + @requires_events(['cpu_frequency', 'cpu_idle']) + def _get_frequency_residency(self, cpus): + """ + Get a DataFrame with per cluster frequency residency, i.e. amount of + time spent at a given frequency in each cluster. + + :param cpus: A tuple of CPU IDs + :type cpus: tuple(int) + + :returns: A :class:`pandas.DataFrame` with: + + * A ``total_time`` column (the total time spent at a frequency) + * A ``active_time`` column (the non-idle time spent at a frequency) + """ + freq_df = self._trace.df_events('cpu_frequency') + # Assumption: all CPUs in a cluster run at the same frequency, i.e. the + # frequency is scaled per-cluster not per-CPU. Hence, we can limit the + # cluster frequencies data to a single CPU. This assumption is verified + # by the Trace module when parsing the trace. + if len(cpus) > 1 and not self._trace.freq_coherency: + self.get_logger().warning('Cluster frequency is NOT coherent,' + 'cannot compute residency!') + return None + + cluster_freqs = freq_df[freq_df.cpu == cpus[0]] + + # Compute TOTAL Time + cluster_freqs = self._trace.add_events_deltas( + cluster_freqs, col_name="total_time", inplace=False) + time_df = cluster_freqs[["total_time", "frequency"]].groupby(["frequency"]).sum() - def df_cpu_frequency_residency(self, cpu, total=True): + # Compute ACTIVE Time + cluster_active = self._trace.analysis.idle.signal_cluster_active(cpus) + + # In order to compute the active time spent at each frequency we + # multiply 2 square waves: + # - cluster_active, a square wave of the form: + # cluster_active[t] == 1 if at least one CPU is reported to be + # non-idle by CPUFreq at time t + # cluster_active[t] == 0 otherwise + # - freq_active, square wave of the form: + # freq_active[t] == 1 if at time t the frequency is f + # freq_active[t] == 0 otherwise + available_freqs = sorted(cluster_freqs.frequency.unique()) + cluster_freqs = cluster_freqs.join( + cluster_active.to_frame(name='active'), how='outer') + cluster_freqs.fillna(method='ffill', inplace=True) + nonidle_time = [] + for freq in available_freqs: + freq_active = cluster_freqs.frequency.apply(lambda x: 1 if x == freq else 0) + active_t = cluster_freqs.active * freq_active + # Compute total time by integrating the square wave + nonidle_time.append(self._trace.integrate_square_wave(active_t)) + + time_df["active_time"] = pd.DataFrame(index=available_freqs, data=nonidle_time) + return time_df + + @requires_events(_get_frequency_residency.required_events) + def df_cpu_frequency_residency(self, cpu): """ Get per-CPU frequency residency, i.e. amount of time CPU `cpu` spent at each frequency. @@ -55,78 +108,47 @@ class FrequencyAnalysis(AnalysisBase): :param cpu: CPU ID :type cpu: int - :param total: if true returns the "total" time, otherwise the "active" - time is returned - :type total: bool - - :returns: :mod:`pandas.DataFrame` - "total" or "active" time residency - at each frequency. + :returns: A :class:`pandas.DataFrame` with: - :raises: TypeError + * A ``total_time`` column (the total time spent at a frequency) + * A ``active_time`` column (the non-idle time spent at a frequency) """ if not isinstance(cpu, int): raise TypeError('Input CPU parameter must be an integer') - residency = self._get_frequency_residency(cpu) - if not residency: - return None - if total: - return residency.total - return residency.active + return self._get_frequency_residency((cpu,)) - def df_cluster_frequency_residency(self, cluster, total=True): + @requires_events(_get_frequency_residency.required_events) + def df_domain_frequency_residency(self, cpu): """ - Get per-Cluster frequency residency, i.e. amount of time CLUSTER - `cluster` spent at each frequency. - - :param cluster: this can be either a list of CPU IDs belonging to a - cluster or the cluster name as specified in the platform - description - :type cluster: str or list(int) + Get per-frequency-domain frequency residency, i.e. amount of time each + domain at each frequency. - :param total: if true returns the "total" time, otherwise the "active" - time is returned - :type total: bool + :param cpu: Any CPU of the domain to analyse + :type cpu: int - :returns: :mod:`pandas.DataFrame` - "total" or "active" time residency - at each frequency. + :returns: A :class:`pandas.DataFrame` with: - :raises: KeyError + * A ``total_time`` column (the total time spent at a frequency) + * A ``active_time`` column (the non-idle time spent at a frequency) """ - if isinstance(cluster, str): - try: - residency = self._get_frequency_residency( - self._trace.plat_info['clusters'][cluster.lower()] - ) - except KeyError: - self._log.warning( - 'Platform descriptor has not a cluster named [%s], ' - 'plot disabled!', cluster - ) - return None - else: - residency = self._get_frequency_residency(cluster) - if not residency: - return None - if total: - return residency.total - return residency.active + domains = self._trace.plat_info['freq-domains'] + for domain in domains: + if cpu in domain: + return self._get_frequency_residency(tuple(domain)) + @requires_events(['cpu_frequency']) def df_cpu_frequency_transitions(self, cpu): """ Compute number of frequency transitions of a given CPU. - Requires cpu_frequency events to be available in the trace. - :param cpu: a CPU ID :type cpu: int - :returns: :mod:`pandas.DataFrame` - number of frequency transitions + :returns: A :class:`pandas.DataFrame` with: + + * A ``transitions`` column (the number of frequency transitions) """ - if not self._trace.hasEvents('cpu_frequency'): - self._log.warn('Events [cpu_frequency] not found, ' - 'frequency data not available') - return None freq_df = self._trace.df_events('cpu_frequency') cpu_freqs = freq_df[freq_df.cpu == cpu].frequency @@ -135,22 +157,23 @@ class FrequencyAnalysis(AnalysisBase): # a cpu_frequency event is triggered that can generate a duplicate) cpu_freqs = cpu_freqs.loc[cpu_freqs.shift(-1) != cpu_freqs] transitions = cpu_freqs.value_counts() - # Convert frequencies to MHz - transitions.index = transitions.index / 1000 + transitions.name = "transitions" transitions.sort_index(inplace=True) + return pd.DataFrame(transitions) + @requires_events(df_cpu_frequency_transitions.required_events) def df_cpu_frequency_transition_rate(self, cpu): """ Compute frequency transition rate of a given CPU. - Requires cpu_frequency events to be available in the trace. :param cpu: a CPU ID :type cpu: int - :returns: :mod:`pandas.DataFrame - number of frequency transitions per - second + :returns: A :class:`pandas.DataFrame` with: + + * A ``transitions`` column (the number of frequency transitions per second) """ transitions = self.df_cpu_frequency_transitions(cpu) if transitions is None: @@ -160,6 +183,24 @@ class FrequencyAnalysis(AnalysisBase): lambda x: x / (self._trace.x_max - self._trace.x_min) ) + @requires_events(['cpu_frequency']) + def get_average_cpu_frequency(self, cpu): + """ + Get the average frequency for a given CPU + + :param cpu: The CPU to analyse + :type cpu: int + """ + df = self._trace.df_events('cpu_frequency') + df = df[df.cpu == cpu] + + # We can't use the pandas average because it's not weighted by + # time spent in each frequency, so we have to craft our own. + df = self._trace.add_events_deltas(df, inplace=False) + timespan = df.index[-1] - df.index[0] + + return (df['frequency'] * df['delta']).sum() / timespan + ############################################################################### # Plotting Methods ############################################################################### @@ -178,7 +219,7 @@ class FrequencyAnalysis(AnalysisBase): """ freq = self._trace.getPeripheralClockEffectiveRate(clk) if freq is None or freq.empty: - self._log.warning('no peripheral clock events found for clock') + self.get_logger().warning('no peripheral clock events found for clock') return fig = plt.figure(figsize=(16,8)) @@ -198,7 +239,7 @@ class FrequencyAnalysis(AnalysisBase): set_rate.plot(style=['b--'], ax=freq_axis, drawstyle='steps-post', alpha=0.4, label="clock_set_rate value") freq_axis.hlines(set_rate.iloc[-1], set_rate.index[-1], self._trace.x_max, linestyle='--', color='b', alpha=0.4) else: - self._log.warning('No clock_set_rate events to plot') + self.get_logger().warning('No clock_set_rate events to plot') # Plot frequency information (effective rate) eff_rate = freq['effective_rate'].dropna() @@ -207,7 +248,7 @@ class FrequencyAnalysis(AnalysisBase): eff_rate.plot(style=['b-'], ax=freq_axis, drawstyle='steps-post', alpha=1.0, label="Effective rate (with on/off)") freq_axis.hlines(eff_rate.iloc[-1], eff_rate.index[-1], self._trace.x_max, linestyle='-', color='b', alpha=1.0) else: - self._log.warning('No effective frequency events to plot') + self.get_logger().warning('No effective frequency events to plot') freq_axis.set_ylim(0, rate_axis_lib * 1.1) freq_axis.set_xlim(self._trace.x_min, self._trace.x_max) @@ -243,757 +284,216 @@ class FrequencyAnalysis(AnalysisBase): figname = os.path.join(self._trace.plots_dir, '{}{}.png'.format(self._trace.plots_prefix, clk)) pl.savefig(figname, bbox_inches='tight') - def plot_cluster_frequencies(self, title='Clusters Frequencies'): - """ - Plot frequency trend for all clusters. If sched_overutilized events are - available, the plots will also show the intervals of time where the - cluster was overutilized. - - :param title: user-defined plot title - :type title: str - """ - if not self._trace.hasEvents('cpu_frequency'): - self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') - return - df = self._trace.df_events('cpu_frequency') - - pd.options.mode.chained_assignment = None - - # Extract LITTLE and big clusters frequencies - # and scale them to [MHz] - if self._little_cpus: - lfreq = df[df.cpu == self._little_cpus[-1]] - lfreq['frequency'] = lfreq['frequency']/1e3 - else: - lfreq = [] - if self._big_cpus: - bfreq = df[df.cpu == self._big_cpus[-1]] - bfreq['frequency'] = bfreq['frequency']/1e3 - else: - bfreq = [] - - # Compute AVG frequency for LITTLE cluster - avg_lfreq = 0 - if len(lfreq) > 0: - lfreq['timestamp'] = lfreq.index - lfreq['delta'] = (lfreq['timestamp'] -lfreq['timestamp'].shift()).fillna(0).shift(-1) - lfreq['cfreq'] = (lfreq['frequency'] * lfreq['delta']).fillna(0) - timespan = lfreq.iloc[-1].timestamp - lfreq.iloc[0].timestamp - avg_lfreq = lfreq['cfreq'].sum()/timespan - - # Compute AVG frequency for big cluster - avg_bfreq = 0 - if len(bfreq) > 0: - bfreq['timestamp'] = bfreq.index - bfreq['delta'] = (bfreq['timestamp'] - bfreq['timestamp'].shift()).fillna(0).shift(-1) - bfreq['cfreq'] = (bfreq['frequency'] * bfreq['delta']).fillna(0) - timespan = bfreq.iloc[-1].timestamp - bfreq.iloc[0].timestamp - avg_bfreq = bfreq['cfreq'].sum()/timespan - - pd.options.mode.chained_assignment = 'warn' - - # Setup a dual cluster plot - fig, pltaxes = plt.subplots(2, 1, figsize=(16, 8)) - plt.suptitle(title, y=.97, fontsize=16, horizontalalignment='center') - - # Plot Cluster frequencies - axes = pltaxes[0] - axes.set_title('big Cluster') - if avg_bfreq > 0: - axes.axhline(avg_bfreq, color='r', linestyle='--', linewidth=2) - axes.set_ylim( - (self._trace.plat_info['freqs']['big'][0] - 100000)/1e3, - (self._trace.plat_info['freqs']['big'][-1] + 100000)/1e3 - ) - if len(bfreq) > 0: - bfreq['frequency'].plot(style=['r-'], ax=axes, - drawstyle='steps-post', alpha=0.4) - else: - self._log.warning('NO big CPUs frequency events to plot') - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_ylabel('MHz') - axes.grid(True) - axes.set_xticklabels([]) - axes.set_xlabel('') - self._trace.analysis.status.plot_overutilized(axes) - - axes = pltaxes[1] - axes.set_title('LITTLE Cluster') - if avg_lfreq > 0: - axes.axhline(avg_lfreq, color='b', linestyle='--', linewidth=2) - axes.set_ylim( - (self._trace.plat_info['freqs']['little'][0] - 100000)/1e3, - (self._trace.plat_info['freqs']['little'][-1] + 100000)/1e3 - ) - if len(lfreq) > 0: - lfreq['frequency'].plot(style=['b-'], ax=axes, - drawstyle='steps-post', alpha=0.4) - else: - self._log.warning('NO LITTLE CPUs frequency events to plot') - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_ylabel('MHz') - axes.grid(True) - self._trace.analysis.status.plot_overutilized(axes) - - # Save generated plots into datadir - figname = '{}/{}cluster_freqs.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix) - pl.savefig(figname, bbox_inches='tight') - - self._log.info('LITTLE cluster average frequency: %.3f GHz', - avg_lfreq/1e3) - self._log.info('big cluster average frequency: %.3f GHz', - avg_bfreq/1e3) - - return (avg_lfreq/1e3, avg_bfreq/1e3) - def plot_cpu_frequencies(self, cpus=None): + @requires_events(['cpu_frequency']) + def plot_cpu_frequencies(self, cpu, filepath=None, axis=None): """ - Plot frequency for the specified CPUs (or all if not specified). - If sched_overutilized events are available, the plots will also show the - intervals of time where the system was overutilized. + Plot frequency for the specified CPU - The generated plots are also saved as PNG images under the folder - specified by the `plots_dir` parameter of :class:`Trace`. + :param cpu: The CPU for which to plot frequencies + :type cpus: int - :param cpus: the list of CPUs to plot, if None it generate a plot - for each available CPU - :type cpus: int or list(int) + :param axis: If specified, the axis to use for plotting + :type axis: matplotlib.axes.Axes - :return: a dictionary of average frequency for each CPU. + If ``sched_overutilized`` events are available, the plots will also + show the intervals of time where the system was overutilized. """ - if not self._trace.hasEvents('cpu_frequency'): - self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') - return df = self._trace.df_events('cpu_frequency') + df = df[df.cpu == cpu] - if cpus is None: - # Generate plots only for available CPUs - cpus = list(range(df.cpu.max()+1)) - else: - # Generate plots only specified CPUs - cpus = listify(cpus) - - chained_assignment = pd.options.mode.chained_assignment - pd.options.mode.chained_assignment = None - - freq = {} - for cpu_id in listify(cpus): - # Extract CPUs' frequencies and scale them to [MHz] - _df = df[df.cpu == cpu_id] - if _df.empty: - self._log.warning('No [cpu_frequency] events for CPU%d, ' - 'plot DISABLED!', cpu_id) - continue - _df['frequency'] = _df.frequency / 1e3 - - # Compute AVG frequency for this CPU - avg_freq = 0 - if len(_df) > 1: - timespan = _df.index[-1] - _df.index[0] - avg_freq = area_under_curve(_df['frequency'], method='rect') / timespan - - # Store DF for plotting - freq[cpu_id] = { - 'df' : _df, - 'avg' : avg_freq, - } - - pd.options.mode.chained_assignment = chained_assignment - - plots_count = len(freq) - if not plots_count: - return + local_fig = not axis - # Setup CPUs plots - fig, pltaxes = plt.subplots(len(freq), 1, figsize=(16, 4 * plots_count)) - - avg_freqs = {} - for plot_idx, cpu_id in enumerate(freq): - - # CPU frequencies and average value - _df = freq[cpu_id]['df'] - _avg = freq[cpu_id]['avg'] - - # Plot average frequency - try: - axes = pltaxes[plot_idx] - except TypeError: - axes = pltaxes - axes.set_title('CPU{:2d} Frequency'.format(cpu_id)) - axes.axhline(_avg, color='r', linestyle='--', linewidth=2) - - # Set plot limit based on CPU min/max frequencies - if 'clusters' in self._trace.plat_info: - for cluster,cpus in self._trace.plat_info['clusters'].items(): - if cpu_id not in cpus: - continue - freqs = self._trace.plat_info['freqs'][cluster] - break - else: - freqs = df['frequency'].unique() - - axes.set_ylim((min(freqs) - 100000) / 1e3, - (max(freqs) + 100000) / 1e3) + if local_fig: + fig, axis = self.setup_plot() - # Plot CPU frequency transitions - _df['frequency'].plot(style=['r-'], ax=axes, - drawstyle='steps-post', alpha=0.4) + frequencies = self._trace.plat_info['freqs'][cpu] - # Plot overutilzied regions (if signal available) - self._trace.analysis.status.plot_overutilized(axes) + avg = self.get_average_cpu_frequency(cpu) + self.get_logger().info( + "Average frequency for CPU{} : {:.3f} GHz".format(cpu, avg/1e6)) - # Finalize plot - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_ylabel('MHz') - axes.grid(True) - if plot_idx + 1 < plots_count: - axes.set_xticklabels([]) - axes.set_xlabel('') + df['frequency'].plot( + ax=axis, drawstyle='steps-post') - avg_freqs[cpu_id] = _avg/1e3 - self._log.info('CPU%02d average frequency: %.3f GHz', - cpu_id, avg_freqs[cpu_id]) + if avg > 0: + axis.axhline(avg, color=self.get_next_color(axis), linestyle='--', + label="average") - # Save generated plots into datadir - figname = '{}/{}cpus_freqs.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix) - pl.savefig(figname, bbox_inches='tight') + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) - return avg_freqs + axis.set_ylim(frequencies[0] * 0.9, frequencies[-1] * 1.1) + axis.set_xlim(self._trace.x_min, self._trace.x_max) + axis.set_ylabel('Frequency (Hz)') + axis.set_xlabel('Time') - def plot_cpu_frequency_residency(self, cpus=None, pct=False, active=False): - """ - Plot per-CPU frequency residency. big CPUs are plotted first and then - LITTLEs. + axis.set_title('Frequency of CPU{}'.format(cpu)) + axis.grid(True) + axis.legend() - Requires the following trace events: - - cpu_frequency - - cpu_idle + if local_fig: + self.save_plot(fig, filepath) - :param cpus: list of CPU IDs. By default plot all CPUs - :type cpus: list(int) or int + return axis - :param pct: plot residencies in percentage - :type pct: bool + @requires_events(plot_cpu_frequencies.required_events) + def plot_domain_frequencies(self, filepath=None): + """ + Plot frequency trend for all frequency domains. - :param active: for percentage plot specify whether to plot active or - total time. Default is TOTAL time - :type active: bool + If ``sched_overutilized`` events are available, the plots will also show + the intervals of time where the cluster was overutilized. """ - if not self._trace.hasEvents('cpu_frequency'): - self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') - return - if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, plot DISABLED!') - return + domains = self._trace.plat_info['freq-domains'] - if cpus is None: - # Generate plots only for available CPUs - cpufreq_data = self._trace.df_events('cpu_frequency') - _cpus = list(range(cpufreq_data.cpu.max()+1)) - else: - _cpus = listify(cpus) + fig, axes = self.setup_plot(nrows=len(domains), sharex=True) + for idx, domain in enumerate(domains): + axis = axes[idx] if len(domains) > 1 else axes - # Split between big and LITTLE CPUs ordered from higher to lower ID - _cpus.reverse() - big_cpus = [c for c in _cpus if c in self._big_cpus] - little_cpus = [c for c in _cpus if c in self._little_cpus] - _cpus = big_cpus + little_cpus + self.plot_cpu_frequencies(domain[0], filepath, axis) - # Precompute active and total time for each CPU - residencies = [] - xmax = 0.0 - for cpu in _cpus: - res = self._get_frequency_residency(cpu) - residencies.append(ResidencyData('CPU{}'.format(cpu), res)) + axis.set_title('Frequencies of CPUS {}'.format(domain)) + axis.set_xlim(self._trace.x_min, self._trace.x_max) - max_time = res.total.max().values[0] - if xmax < max_time: - xmax = max_time + self.save_plot(fig, filepath) - self._plot_frequency_residency(residencies, 'cpu', xmax, pct, active) + return axes - def plot_cluster_frequency_residency(self, clusters=None, - pct=False, active=False): + @requires_events(df_cpu_frequency_residency.required_events) + def plot_cpu_frequency_residency(self, cpu, filepath=None, pct=False, axes=None): """ - Plot the frequency residency in a given cluster, i.e. the amount of - time cluster `cluster` spent at frequency `f_i`. By default, both 'big' - and 'LITTLE' clusters data are plotted. + Plot per-CPU frequency residency. - Requires the following trace events: - - cpu_frequency - - cpu_idle - - :param clusters: name of the clusters to be plotted (all of them by - default) - :type clusters: str ot list(str) + :param cpu: The CPU to generate the plot for + :type cpu: int - :param pct: plot residencies in percentage + :param pct: Plot residencies in percentage :type pct: bool - :param active: for percentage plot specify whether to plot active or - total time. Default is TOTAL time - :type active: bool + :param axes: If specified, the axes to use for plotting + :type axis: numpy.ndarray(matplotlib.axes.Axes) """ - if not self._trace.hasEvents('cpu_frequency'): - self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') - return - if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, plot DISABLED!') - return - if 'clusters' not in self._trace.plat_info: - self._log.warning('No platform cluster info. Plot DISABLED!') - return - # Assumption: all CPUs in a cluster run at the same frequency, i.e. the - # frequency is scaled per-cluster not per-CPU. Hence, we can limit the - # cluster frequencies data to a single CPU - if not self._trace.freq_coherency: - self._log.warning('Cluster frequency is not coherent, plot DISABLED!') - return - - # Sanitize clusters - if clusters is None: - _clusters = list(self._trace.plat_info['clusters'].keys()) - else: - _clusters = listify(clusters) + local_fig = axes is None - # Precompute active and total time for each cluster - residencies = [] - xmax = 0.0 - for cluster in _clusters: - res = self._get_frequency_residency( - self._trace.plat_info['clusters'][cluster.lower()]) - residencies.append(ResidencyData('{} Cluster'.format(cluster), - res)) + if local_fig: + fig, axes = self.setup_plot(nrows=2) - max_time = res.total.max().values[0] - if xmax < max_time: - xmax = max_time + residency_df = self.df_cpu_frequency_residency(cpu) - self._plot_frequency_residency(residencies, 'cluster', xmax, pct, active) + total_df = residency_df.total_time + active_df = residency_df.active_time - def plot_cpu_frequency_transitions(self, cpus=None, pct=False): - """ - Plot frequency transitions count of the specified CPUs (or all if not - specified). - - Requires cpu_frequency events to be available in the trace. - - :param cpus: list of CPU IDs (all CPUs by default) - :type clusters: int or list(int) - - :param pct: plot frequency transitions in percentage - :type pct: bool - """ - if not self._trace.hasEvents('cpu_frequency'): - self._log.warn('Events [cpu_frequency] not found, plot DISABLED!') - return - df = self._trace.df_events('cpu_frequency') - - if cpus is None: - _cpus = list(range(df.cpu.max() + 1)) - else: - _cpus = listify(cpus) + if pct: + total_df = total_df * 100 / total_df.sum() + active_df = active_df * 100 / active_df.sum() - n_plots = len(_cpus) - gs = gridspec.GridSpec(n_plots, 1) - fig = plt.figure() + total_df.plot.barh(ax=axes[0]) + axes[0].set_title("CPU{} total frequency residency".format(cpu)) - # Precompute frequency transitions - transitions = {} - xmax = 0 - for cpu_id in _cpus: - t = self.df_cpu_frequency_transitions(cpu_id) + active_df.plot.barh(ax=axes[1]) + axes[1].set_title("CPU{} active frequency residency".format(cpu)) + for axis in axes: if pct: - tot = t.transitions.sum() - t = t.apply(lambda x: x * 100.0 / tot) + axis.set_xlabel("Time share (%)") + else: + axis.set_xlabel("Time (s)") - transitions[cpu_id] = t - max_cnt = t.transitions.max() - if xmax < max_cnt: xmax = max_cnt + axis.set_ylabel("Frequency (Hz)") + axis.grid(True) - if pct: - yrange = 0.4 * max(6, len(t)) * n_plots - figtype = "_pct" - labeltype = " [%]" - else: - yrange = 3 * n_plots - figtype = "" - labeltype = "" + if local_fig: + self.save_plot(fig, filepath) - for idx, cpu_id in enumerate(_cpus): - t = transitions[cpu_id] + return axes - axes = fig.add_subplot(gs[idx]) - if pct: - t.T.plot.barh(ax=axes, figsize=(16, yrange), - stacked=True, title='CPU{}'.format(cpu_id)) - axes.legend(loc='lower center', ncol=7) - axes.set_xlim(0, 100) - axes.set_yticklabels([]) - else: - t.plot.barh(ax=axes, figsize=(16, yrange), - color='g', legend=False, - title='CPU{}'.format(cpu_id)) - axes.set_xlim(0, xmax*1.05) - axes.grid(True) - axes.set_ylabel('Frequency [MHz]') - - if idx+1 < n_plots: - axes.set_xticklabels([]) - - axes = fig.axes[0] - legend_y = axes.get_ylim()[1] - axes.annotate('OPP Transitions{}'.format(labeltype), - xy=(0, legend_y), xytext=(-50, 25), - textcoords='offset points', fontsize=18) - fig.axes[-1].set_xlabel('Number of transitions{}'.format(labeltype)) - - figname = '{}cpu_freq_transitions{}.png'.format( - self._trace.plots_prefix, figtype) - fig.savefig(os.path.join(self._trace.plots_dir, figname), - bbox_inches='tight') - - def plot_cluster_frequency_transitions(self, clusters=None, pct=False): + @requires_events(plot_cpu_frequency_residency.required_events) + def plot_domain_frequency_residency(self, filepath=None, pct=False): """ - Plot frequency transitions count of the specified clusters (all of them - is not specified). - - Requires cpu_frequency events to be available in the trace. - - Notice that we assume that frequency is - scaled at cluster level, therefore we always consider the first CPU of - a cluster for this computation. + Plot the frequency residency for all frequency domains. - :param clusters: name of the clusters to be plotted (all of them by - default) - :type clusters: str or list(str) - - :param pct: plot frequency transitions in percentage + :param pct: Plot residencies in percentage :type pct: bool """ - if not self._trace.hasEvents('cpu_frequency'): - self._log.warn('Events [cpu_frequency] not found, plot DISABLED!') - return + domains = self._trace.plat_info['freq-domains'] - if not self._trace.plat_info or 'clusters' not in self._trace.plat_info: - self._log.warn('No platform cluster info, plot DISABLED!') - return + fig, axes = self.setup_plot(nrows=2*len(domains), sharex=True) + for idx, domain in enumerate(domains): + local_axes = axes[2 * idx : 2 * (idx + 1)] - if clusters is None: - _clusters = list(self._trace.plat_info['clusters'].keys()) - else: - _clusters = listify(clusters) + self.plot_cpu_frequency_residency(domain[0], filepath, pct, local_axes) + for axis in local_axes: + title = axis.get_title() + axis.set_title(title.replace("CPU{}".format(domain[0]), "CPUs {}".format(domain))) - n_plots = len(_clusters) - gs = gridspec.GridSpec(n_plots, 1) - fig = plt.figure() - - # Precompute frequency transitions - transitions = {} - xmax = 0 - for c in _clusters: - # We assume frequency is scaled at cluster level and we therefore - # pick information from the first CPU in the cluster. - cpu_id = self._trace.plat_info['clusters'][c.lower()][0] - t = self.df_cpu_frequency_transitions(cpu_id) - - if pct: - tot = t.transitions.sum() - t = t.apply(lambda x: x * 100.0 / tot) + self.save_plot(fig, filepath) - transitions[c] = t - max_cnt = t.transitions.max() - if xmax < max_cnt: xmax = max_cnt + return axes - if pct: - yrange = 0.4 * max(6, len(t)) * n_plots - figtype = "_pct" - labeltype = " [%]" - else: - yrange = 3 * n_plots - figtype = "" - labeltype = "" - - for idx, c in enumerate(_clusters): - t = transitions[c] - - axes = fig.add_subplot(gs[idx]) - if pct: - t.T.plot.barh(ax=axes, figsize=(16, yrange), - stacked=True, title='{} Cluster'.format(c)) - axes.legend(loc='lower center', ncol=7) - axes.set_xlim(0, 100) - axes.set_yticklabels([]) - else: - t.plot.barh(ax=axes, figsize=(16, yrange), - color='g', legend=False, - title='{} Cluster'.format(c)) - axes.set_xlim(0, xmax*1.05) - axes.grid(True) - axes.set_ylabel('Frequency [MHz]') - - if idx+1 < n_plots: - axes.set_xticklabels([]) - - axes = fig.axes[0] - legend_y = axes.get_ylim()[1] - axes.annotate('OPP Transitions{}'.format(labeltype), - xy=(0, legend_y), xytext=(-50, 25), - textcoords='offset points', fontsize=18) - fig.axes[-1].set_xlabel('Number of transitions{}'.format(labeltype)) - - figname = '{}cluster_freq_transitions{}.png'.format( - self._trace.plots_prefix, figtype) - fig.savefig(os.path.join(self._trace.plots_dir, figname), - bbox_inches='tight') - -############################################################################### -# Utility Methods -############################################################################### - - @memoized - def _get_frequency_residency(self, cluster): - """ - Get a DataFrame with per cluster frequency residency, i.e. amount of - time spent at a given frequency in each cluster. - - :param cluster: this can be either a single CPU ID or a list of CPU IDs - belonging to a cluster - :type cluster: int or list(int) - - :returns: namedtuple(ResidencyTime) - tuple of total and active time - dataframes + @requires_events(df_cpu_frequency_transitions.required_events) + def plot_cpu_frequency_transitions(self, cpu, filepath=None, pct=False, axis=None): """ - if not self._trace.hasEvents('cpu_frequency'): - self._log.warning('Events [cpu_frequency] not found, ' - 'frequency residency computation not possible!') - return None - if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, ' - 'frequency residency computation not possible!') - return None + Plot frequency transitions count of the specified CPU - _cluster = listify(cluster) - - freq_df = self._trace.df_events('cpu_frequency') - # Assumption: all CPUs in a cluster run at the same frequency, i.e. the - # frequency is scaled per-cluster not per-CPU. Hence, we can limit the - # cluster frequencies data to a single CPU. This assumption is verified - # by the Trace module when parsing the trace. - if len(_cluster) > 1 and not self._trace.freq_coherency: - self._log.warning('Cluster frequency is NOT coherent,' - 'cannot compute residency!') - return None - cluster_freqs = freq_df[freq_df.cpu == _cluster[0]] - - # Compute TOTAL Time - time_intervals = cluster_freqs.index[1:] - cluster_freqs.index[:-1] - total_time = pd.DataFrame({ - 'time': time_intervals, - 'frequency': [f/1000.0 for f in cluster_freqs.iloc[:-1].frequency] - }) - total_time = total_time.groupby(['frequency']).sum() - - # Compute ACTIVE Time - cluster_active = self._trace.getClusterActiveSignal(_cluster) - - # In order to compute the active time spent at each frequency we - # multiply 2 square waves: - # - cluster_active, a square wave of the form: - # cluster_active[t] == 1 if at least one CPU is reported to be - # non-idle by CPUFreq at time t - # cluster_active[t] == 0 otherwise - # - freq_active, square wave of the form: - # freq_active[t] == 1 if at time t the frequency is f - # freq_active[t] == 0 otherwise - available_freqs = sorted(cluster_freqs.frequency.unique()) - cluster_freqs = cluster_freqs.join( - cluster_active.to_frame(name='active'), how='outer') - cluster_freqs.fillna(method='ffill', inplace=True) - nonidle_time = [] - for f in available_freqs: - freq_active = cluster_freqs.frequency.apply(lambda x: 1 if x == f else 0) - active_t = cluster_freqs.active * freq_active - # Compute total time by integrating the square wave - nonidle_time.append(self._trace.integrate_square_wave(active_t)) - - active_time = pd.DataFrame({'time': nonidle_time}, - index=[f/1000.0 for f in available_freqs]) - active_time.index.name = 'frequency' - return ResidencyTime(total_time, active_time) + :param cpu: The CPU to genererate the plot for + :type cpu: int - def _plot_frequency_residency_abs(self, axes, residency, n_plots, - is_first, is_last, xmax, title=''): + :param pct: Plot frequency transitions in percentage + :type pct: bool """ - Private method to generate frequency residency plots. - - :param axes: axes over which to generate the plot - :type axes: matplotlib.axes.Axes + local_fig = axis is None - :param residency: tuple of total and active time dataframes - :type residency: namedtuple(ResidencyTime) + if local_fig: + fig, axis = self.setup_plot() - :param n_plots: total number of plots - :type n_plots: int + df = self.df_cpu_frequency_transitions(cpu) - :param is_first: if True this is the first plot - :type is_first: bool + if pct: + df = df * 100 / df.sum() - :param is_last: if True this is the last plot - :type is_last: bool + df["transitions"].plot.barh(ax=axis) - :param xmax: x-axes higher bound - :param xmax: double + axis.set_title('Frequency transitions of CPU{}'.format(cpu)) - :param title: title of this subplot - :type title: str - """ - yrange = 0.4 * max(6, len(residency.total)) * n_plots - residency.total.plot.barh(ax=axes, color='g', - legend=False, figsize=(16, yrange)) - residency.active.plot.barh(ax=axes, color='r', - legend=False, figsize=(16, yrange)) - - axes.set_xlim(0, 1.05*xmax) - axes.set_ylabel('Frequency [MHz]') - axes.set_title(title) - axes.grid(True) - if is_last: - axes.set_xlabel('Time [s]') + if pct: + axis.set_xlabel("Transitions share (%)") else: - axes.set_xticklabels([]) - - if is_first: - # Put title on top of the figure. As of now there is no clean way - # to make the title appear always in the same position in the - # figure because figure heights may vary between different - # platforms (different number of OPPs). Hence, we use annotation - legend_y = axes.get_ylim()[1] - axes.annotate('OPP Residency Time', xy=(0, legend_y), - xytext=(-50, 45), textcoords='offset points', - fontsize=18) - axes.annotate('GREEN: Total', xy=(0, legend_y), - xytext=(-50, 25), textcoords='offset points', - color='g', fontsize=14) - axes.annotate('RED: Active', xy=(0, legend_y), - xytext=(50, 25), textcoords='offset points', - color='r', fontsize=14) - - def _plot_frequency_residency_pct(self, axes, residency_df, label, - n_plots, is_first, is_last, res_type): - """ - Private method to generate PERCENTAGE frequency residency plots. + axis.set_xlabel("Transition count") - :param axes: axes over which to generate the plot - :type axes: matplotlib.axes.Axes + axis.set_ylabel("Frequency (Hz)") + axis.grid(True) - :param residency_df: residency time dataframe - :type residency_df: :mod:`pandas.DataFrame` + if local_fig: + self.save_plot(fig, filepath) - :param label: label to be used for percentage residency dataframe - :type label: str + return axis - :param n_plots: total number of plots - :type n_plots: int - - :param is_first: if True this is the first plot - :type is_first: bool - - :param is_first: if True this is the last plot - :type is_first: bool - - :param res_type: type of residency, either TOTAL or ACTIVE - :type title: str + @requires_events(plot_cpu_frequency_transitions.required_events) + def plot_domain_frequency_transitions(self, filepath=None, pct=False): """ - # Compute sum of the time intervals - duration = residency_df.time.sum() - residency_pct = pd.DataFrame( - {label: residency_df.time.apply(lambda x: x*100/duration)}, - index=residency_df.index - ) - yrange = 3 * n_plots - residency_pct.T.plot.barh(ax=axes, stacked=True, figsize=(16, yrange)) - - axes.legend(loc='lower center', ncol=7) - axes.set_xlim(0, 100) - axes.grid(True) - if is_last: - axes.set_xlabel('Residency [%]') - else: - axes.set_xticklabels([]) - if is_first: - legend_y = axes.get_ylim()[1] - axes.annotate('OPP {} Residency Time'.format(res_type), - xy=(0, legend_y), xytext=(-50, 35), - textcoords='offset points', fontsize=18) - - def _plot_frequency_residency(self, residencies, entity_name, xmax, - pct, active): + Plot frequency transitions count for all frequency domains + + :param pct: Plot frequency transitions in percentage + :type pct: bool """ - Generate Frequency residency plots for the given entities. + domains = self._trace.plat_info['freq-domains'] - :param residencies: list of residencies to be plotted - :type residencies: list(namedtuple(ResidencyData)) - each tuple - contains: - - a label to be used as subplot title - - a namedtuple(ResidencyTime) + fig, axes = self.setup_plot(nrows=len(domains)) - :param entity_name: name of the entity ('cpu' or 'cluster') used in the - figure name - :type entity_name: str + for idx, domain in enumerate(domains): + axis = axes[idx] - :param xmax: upper bound of x-axes - :type xmax: double + self.plot_cpu_frequency_transitions(domain[0], filepath, pct, axis) - :param pct: plot residencies in percentage - :type pct: bool + title = axis.get_title() + axis.set_title(title.replace("CPU{}".format(domain[0]), "CPUs {}".format(domain))) - :param active: for percentage plot specify whether to plot active or - total time. Default is TOTAL time - :type active: bool - """ - n_plots = len(residencies) - gs = gridspec.GridSpec(n_plots, 1) - fig = plt.figure() - - figtype = "" - for idx, data in enumerate(residencies): - if data.residency is None: - plt.close(fig) - return - - axes = fig.add_subplot(gs[idx]) - is_first = idx == 0 - is_last = idx+1 == n_plots - if pct and active: - self._plot_frequency_residency_pct(axes, data.residency.active, - data.label, n_plots, - is_first, is_last, - 'ACTIVE') - figtype = "_pct_active" - continue - if pct: - self._plot_frequency_residency_pct(axes, data.residency.total, - data.label, n_plots, - is_first, is_last, - 'TOTAL') - figtype = "_pct_total" - continue - - self._plot_frequency_residency_abs(axes, data.residency, - n_plots, is_first, - is_last, xmax, - title=data.label) - - figname = '{}/{}{}_freq_residency{}.png'\ - .format(self._trace.plots_dir, - self._trace.plots_prefix, - entity_name, figtype) - pl.savefig(figname, bbox_inches='tight') + self.save_plot(fig, filepath) + + return axes # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/lisa/analysis/functions.py b/lisa/analysis/functions.py index 5169416d99212d1e60f718a34e588ab630146514..64ecb3e9ad191a527c446099f9a1aed7c9cbfcb2 100644 --- a/lisa/analysis/functions.py +++ b/lisa/analysis/functions.py @@ -52,7 +52,7 @@ class FunctionsAnalysis(AnalysisBase): :type metrics: srt or list(str) """ if not hasattr(self._trace, '_functions_stats_df'): - self._log.warning('Functions stats data not available') + self.get_logger().warning('Functions stats data not available') return metrics = listify(metrics) diff --git a/lisa/analysis/idle.py b/lisa/analysis/idle.py index f22719c1a4f47296c9e5354e1c937430f8cdd98a..e467ccd5c74af9c0cf77dbe1c26ce52331c043f1 100644 --- a/lisa/analysis/idle.py +++ b/lisa/analysis/idle.py @@ -15,15 +15,15 @@ # limitations under the License. # -""" Idle Analysis Module """ +from functools import reduce +import operator -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt import pandas as pd -import pylab as pl -from lisa.analysis.base import AnalysisBase, ResidencyTime, ResidencyData -from trappy.utils import listify +from trappy.utils import handle_duplicate_index + +from lisa.utils import memoized +from lisa.analysis.base import AnalysisBase, requires_events class IdleAnalysis(AnalysisBase): @@ -43,24 +43,112 @@ class IdleAnalysis(AnalysisBase): # DataFrame Getter Methods ############################################################################### + @memoized + @requires_events(['cpu_idle']) + def signal_cpu_active(self, cpu): + """ + Build a square wave representing the active (i.e. non-idle) CPU time + + :param cpu: CPU ID + :type cpu: int + + :returns: A :class:`pandas.Series` that equals 1 at timestamps where the + CPU is reported to be non-idle, 0 otherwise + """ + idle_df = self._trace.df_events('cpu_idle') + cpu_df = idle_df[idle_df.cpu_id == cpu] + + cpu_active = cpu_df.state.apply( + lambda s: 1 if s == -1 else 0 + ) + + start_time = 0.0 + if not self._trace.ftrace.normalized_time: + start_time = self._trace.ftrace.basetime + + if cpu_active.empty: + cpu_active = pd.Series([0], index=[start_time]) + elif cpu_active.index[0] != start_time: + entry_0 = pd.Series(cpu_active.iloc[0] ^ 1, index=[start_time]) + cpu_active = pd.concat([entry_0, cpu_active]) + + # Fix sequences of wakeup/sleep events reported with the same index + return handle_duplicate_index(cpu_active) + + @requires_events(signal_cpu_active.required_events) + def signal_cluster_active(self, cluster): + """ + Build a square wave representing the active (i.e. non-idle) cluster time + + :param cluster: list of CPU IDs belonging to a cluster + :type cluster: list(int) + + :returns: A :class:`pandas.Series` that equals 1 at timestamps where at + least one CPU is reported to be non-idle, 0 otherwise + """ + active = self.signal_cpu_active(cluster[0]).to_frame(name=cluster[0]) + for cpu in cluster[1:]: + active = active.join( + self.signal_cpu_active(cpu).to_frame(name=cpu), + how='outer' + ) + + active.fillna(method='ffill', inplace=True) + # There might be NaNs in the signal where we got data from some CPUs + # before others. That will break the .astype(int) below, so drop rows + # with NaN in them. + active.dropna(inplace=True) + + # Cluster active is the OR between the actives on each CPU + # belonging to that specific cluster + cluster_active = reduce( + operator.or_, + [cpu_active.astype(int) for _, cpu_active in + active.items()] + ) + + return cluster_active + + @requires_events(['cpu_idle']) + def df_cpus_wakeups(self): + """" + Get a DataFrame showing when CPUs have woken from idle + + :param cpus: List of CPUs to find wakeups for. If None, all CPUs. + :type cpus: list(int) or None + + :returns: A :class:`pandas.DataFrame` with + + * A ``cpu`` column (the CPU that woke up at the row index) + """ + cpus = list(range(self._trace.cpus_count)) + + sr = pd.Series() + for cpu in cpus: + cpu_sr = self._trace.getCPUActiveSignal(cpu) + cpu_sr = cpu_sr[cpu_sr == 1] + cpu_sr = cpu_sr.replace(1, cpu) + sr = sr.append(cpu_sr) + + return pd.DataFrame({'cpu': sr}).sort_index() + + @requires_events(["cpu_idle"]) def df_cpu_idle_state_residency(self, cpu): """ Compute time spent by a given CPU in each idle state. - :param entity: CPU ID - :type entity: int + :param cpu: CPU ID + :type cpu: int - :returns: :mod:`pandas.DataFrame` - idle state residency dataframe - """ - if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, ' - 'idle state residency computation not possible!') - return None + :returns: a :class:`pandas.DataFrame` with: + * Idle states as index + * A ``time`` column (The time spent in the idle state) + """ idle_df = self._trace.df_events('cpu_idle') cpu_idle = idle_df[idle_df.cpu_id == cpu] - cpu_is_idle = self._trace.getCPUActiveSignal(cpu) ^ 1 + cpu_is_idle = self.signal_cpu_active(cpu) ^ 1 # In order to compute the time spent in each idle state we # multiply 2 square waves: @@ -93,35 +181,26 @@ class IdleAnalysis(AnalysisBase): idle_time_df.index.name = 'idle_state' return idle_time_df + @requires_events(['cpu_idle']) def df_cluster_idle_state_residency(self, cluster): """ Compute time spent by a given cluster in each idle state. - :param cluster: cluster name or list of CPU IDs - :type cluster: str or list(int) + :param cluster: list of CPU IDs + :type cluster: list(int) - :returns: :mod:`pandas.DataFrame` - idle state residency dataframe - """ - if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, ' - 'idle state residency computation not possible!') - return None - - _cluster = cluster - if isinstance(cluster, str) or isinstance(cluster, str): - try: - _cluster = self._trace.plat_info['clusters'][cluster.lower()] - except KeyError: - self._log.warning('%s cluster not found!', cluster) - return None + :returns: a :class:`pandas.DataFrame` with: + * Idle states as index + * A ``time`` column (The time spent in the idle state) + """ idle_df = self._trace.df_events('cpu_idle') # Each core in a cluster can be in a different idle state, but the # cluster lies in the idle state with lowest ID, that is the shallowest # idle state among the idle states of its CPUs - cl_idle = idle_df[idle_df.cpu_id == _cluster[0]].state.to_frame( - name=_cluster[0]) - for cpu in _cluster[1:]: + cl_idle = idle_df[idle_df.cpu_id == cluster[0]].state.to_frame( + name=cluster[0]) + for cpu in cluster[1:]: cl_idle = cl_idle.join( idle_df[idle_df.cpu_id == cpu].state.to_frame(name=cpu), how='outer' @@ -133,9 +212,9 @@ class IdleAnalysis(AnalysisBase): # cl_is_idle[t] == 1 if all CPUs in the cluster are reported # to be idle by cpufreq at time t # cl_is_idle[t] == 0 otherwise - cl_is_idle = self._trace.getClusterActiveSignal(_cluster) ^ 1 + cl_is_idle = self.signal_cluster_active(cluster) ^ 1 - # In order to compute the time spent in each idle statefrequency we + # In order to compute the time spent in each idle state frequency we # multiply 2 square waves: # - cluster_is_idle # - idle_state, square wave of the form: @@ -165,161 +244,103 @@ class IdleAnalysis(AnalysisBase): # Plotting Methods ############################################################################### - def plot_cpu_idle_state_residency(self, cpus=None, pct=False): + @requires_events(df_cpu_idle_state_residency.required_events) + def plot_cpu_idle_state_residency(self, cpu, filepath=None, pct=False): + """ + Plot the idle state residency of a CPU + + :param cpu: The CPU + :type cpu: int + + :param pct: Plot residencies in percentage + :type pct: bool """ - Plot per-CPU idle state residency. big CPUs are plotted first and then - LITTLEs. + fig, axis = self.setup_plot() - Requires cpu_idle trace events. + df = self.df_cpu_idle_state_residency(cpu) - :param cpus: list of CPU IDs. By default plot all CPUs - :type cpus: list(int) or int + self._plot_idle_state_residency(df, axis, pct) - :param pct: plot residencies in percentage + axis.set_title("CPU{} idle state residency".format(cpu)) + + self.save_plot(fig, filepath) + + return axis + + @requires_events(df_cluster_idle_state_residency.required_events) + def plot_cluster_idle_state_residency(self, cluster, filepath=None, + pct=False, axis=None): + """ + Plot the idle state residency of a cluster + + :param cluster: The cluster + :type cpu: list(int) + + :param pct: Plot residencies in percentage :type pct: bool + + :param axes: If specified, the axis to use for plotting + :type axis: matplotlib.axes.Axes """ - if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, ' - 'plot DISABLED!') - return - - if cpus is None: - # Generate plots only for available CPUs - cpuidle_data = self._trace.df_events('cpu_idle') - _cpus = list(range(cpuidle_data.cpu_id.max() + 1)) - else: - _cpus = listify(cpus) + local_fig = axis is None + + if local_fig: + fig, axis = self.setup_plot() - # Split between big and LITTLE CPUs ordered from higher to lower ID - _cpus.reverse() - big_cpus = [c for c in _cpus if c in self._big_cpus] - little_cpus = [c for c in _cpus if c in self._little_cpus] - _cpus = big_cpus + little_cpus + df = self.df_cluster_idle_state_residency(cluster) - residencies = [] - xmax = 0.0 - for cpu in _cpus: - r = self.df_cpu_idle_state_residency(cpu) - residencies.append(ResidencyData('CPU{}'.format(cpu), r)) + self._plot_idle_state_residency(df, axis, pct) - max_time = r.max().values[0] - if xmax < max_time: - xmax = max_time + axis.set_title("CPUs {} idle state residency".format(cluster)) - self._plot_idle_state_residency(residencies, 'cpu', xmax, pct=pct) + if local_fig: + self.save_plot(fig, filepath) - def plot_cluster_idle_state_residency(self, clusters=None, pct=False): + return axis + + @requires_events(plot_cluster_idle_state_residency.required_events) + def plot_clusters_idle_state_residency(self, filepath=None, pct=False): """ - Plot per-cluster idle state residency in a given cluster, i.e. the - amount of time cluster `cluster` spent in idle state `i`. By default, - both 'big' and 'LITTLE' clusters data are plotted. - - Requires cpu_idle following trace events. - :param clusters: name of the clusters to be plotted (all of them by - default) - :type clusters: str ot list(str) + Plot the idle state residency of all clusters + + :param pct: Plot residencies in percentage + :type pct: bool + + .. note:: This assumes clusters == frequency domains, which may + not hold true... """ - if not self._trace.hasEvents('cpu_idle'): - self._log.warning('Events [cpu_idle] not found, plot DISABLED!') - return - if 'clusters' not in self._trace.plat_info: - self._log.warning('No platform cluster info. Plot DISABLED!') - return - - # Sanitize clusters - if clusters is None: - _clusters = list(self._trace.plat_info['clusters'].keys()) - else: - _clusters = listify(clusters) + clusters = self._trace.plat_info['freq-domains'] - # Precompute residencies for each cluster - residencies = [] - xmax = 0.0 - for c in _clusters: - r = self.df_cluster_idle_state_residency(c.lower()) - residencies.append(ResidencyData('{} Cluster'.format(c), r)) + fig, axes = self.setup_plot(nrows=len(clusters), sharex=True) - max_time = r.max().values[0] - if xmax < max_time: - xmax = max_time + for idx, cluster in enumerate(clusters): + axis = axes[idx] - self._plot_idle_state_residency(residencies, 'cluster', xmax, pct=pct) + self.plot_cluster_idle_state_residency(cluster, pct=pct, axis=axis) + + self.save_plot(fig, filepath) + + return axes ############################################################################### # Utility Methods ############################################################################### - def _plot_idle_state_residency(self, residencies, entity_name, xmax, - pct=False): + def _plot_idle_state_residency(self, df, axis, pct): """ - Generate Idle state residency plots for the given entities. - - :param residencies: list of residencies to be plot - :type residencies: list(namedtuple(ResidencyData)) - each tuple - contains: - - a label to be used as subplot title - - a dataframe with residency for each idle state + A convenient helper to plot idle state residency + """ + if pct: + df = df * 100 / df.sum() - :param entity_name: name of the entity ('cpu' or 'cluster') used in the - figure name - :type entity_name: str + df["time"].plot.barh(ax=axis) - :param xmax: upper bound of x-axes - :type xmax: double + if pct: + axis.set_xlabel("Time share (%)") + else: + axis.set_xlabel("Time (s)") - :param pct: plot residencies in percentage - :type pct: bool - """ - n_plots = len(residencies) - gs = gridspec.GridSpec(n_plots, 1) - fig = plt.figure() - - for idx, data in enumerate(residencies): - r = data.residency - if r is None: - plt.close(fig) - return - - axes = fig.add_subplot(gs[idx]) - is_first = idx == 0 - is_last = idx+1 == n_plots - yrange = 0.4 * max(6, len(r)) * n_plots - if pct: - duration = r.time.sum() - r_pct = r.apply(lambda x: x*100/duration) - r_pct.columns = [data.label] - r_pct.T.plot.barh(ax=axes, stacked=True, figsize=(16, yrange)) - - axes.legend(loc='lower center', ncol=7) - axes.set_xlim(0, 100) - else: - r.plot.barh(ax=axes, color='g', - legend=False, figsize=(16, yrange)) - - axes.set_xlim(0, 1.05*xmax) - axes.set_ylabel('Idle State') - axes.set_title(data.label) - - axes.grid(True) - if is_last: - if pct: - axes.set_xlabel('Residency [%]') - else: - axes.set_xlabel('Time [s]') - else: - axes.set_xticklabels([]) - - if is_first: - legend_y = axes.get_ylim()[1] - axes.annotate('Idle State Residency Time', xy=(0, legend_y), - xytext=(-50, 45), textcoords='offset points', - fontsize=18) - - figname = '{}/{}{}_idle_state_residency.png'\ - .format(self._trace.plots_dir, - self._trace.plots_prefix, - entity_name) - - pl.savefig(figname, bbox_inches='tight') + axis.set_ylabel("Idle state") + axis.grid(True) # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/lisa/analysis/latency.py b/lisa/analysis/latency.py index 29aef7b283a202662dfb691e38cfe904fa55ba20..70f6252f50d3c70b38d50984036356e6cf1a23e7 100644 --- a/lisa/analysis/latency.py +++ b/lisa/analysis/latency.py @@ -15,26 +15,12 @@ # limitations under the License. # -""" Latency Analysis Module """ - -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt -import numpy as np import pandas as pd -import pylab as pl -import re -import os +import numpy as np -from collections import namedtuple -from lisa.analysis.base import AnalysisBase +from lisa.analysis.base import AnalysisBase, requires_events, COLOR_CYCLES +from lisa.analysis.tasks import TaskState, TasksAnalysis from lisa.utils import memoized -from trappy.utils import listify -from devlib.target import KernelVersion - -# Tuple representing all IDs data of a Task -TaskData = namedtuple('TaskData', ['pid', 'names', 'label']) - -CDF = namedtuple('CDF', ['df', 'threshold', 'above', 'below']) class LatencyAnalysis(AnalysisBase): """ @@ -46,6 +32,9 @@ class LatencyAnalysis(AnalysisBase): name = 'latency' + LATENCY_THRESHOLD_ZONE_COLOR=COLOR_CYCLES[2] + LATENCY_THRESHOLD_COLOR=COLOR_CYCLES[3] + def __init__(self, trace): super(LatencyAnalysis, self).__init__(trace) @@ -53,917 +42,378 @@ class LatencyAnalysis(AnalysisBase): # DataFrame Getter Methods ############################################################################### - @memoized - def df_latency(self, task): + @requires_events(TasksAnalysis.df_task_states.required_events) + def df_latency_wakeup(self, task): """ - DataFrame of task's wakeup/suspend events - - The returned DataFrame index is the time, in seconds, an event related - to `task` happened. - The DataFrame has these columns: - - target_cpu: the CPU where the task has been scheduled - reported only for wakeup events - - curr_state: the current task state: - A letter which corresponds to the standard events reported by the - prev_state field of a sched_switch event. - Only exception is 'A', which is used to represent active tasks, - i.e. tasks RUNNING on a CPU - - next_state: the next status for the task - - t_start: the time when the current status started, it matches Time - - t_delta: the interval of time after witch the task will switch to the - next_state - - :param task: the task to report wakeup latencies for + DataFrame of a task's wakeup latencies + + :param task: The task's name or PID :type task: int or str - """ - if not self._trace.hasEvents('sched_wakeup'): - self._log.warning('Events [sched_wakeup] not found, ' - 'cannot compute CPU active signal!') - return None - if not self._trace.hasEvents('sched_switch'): - self._log.warning('Events [sched_switch] not found, ' - 'cannot compute CPU active signal!') - return None - - # Get task data - td = self._get_task_data(task) - if not td: - return None - - wk_df = self._trace.df_events('sched_wakeup') - sw_df = self._trace.df_events('sched_switch') - - # Filter Task's WAKEUP events - task_wakeup = wk_df[wk_df.pid == td.pid][['target_cpu', 'pid']] - - # Filter Task's START events - task_events = (sw_df.prev_pid == td.pid) | (sw_df.next_pid == td.pid) - task_switches_df = sw_df[task_events]\ - [['__cpu', 'prev_pid', 'next_pid', 'prev_state']] - - # Unset prev_state for switch_in events, i.e. - # we don't care about the status of a task we are replacing - task_switches_df.prev_state = task_switches_df.apply( - lambda r : np.nan if r['prev_pid'] != td.pid - else self._task_state(r['prev_state']), - axis=1) - - # Rename prev_state - task_switches_df.rename(columns={'prev_state' : 'curr_state'}, inplace=True) - - # Fill in Running status - # We've just set curr_state (a.k.a prev_state) to nan where td.pid was - # switching in, so set the state to 'A' ("active") in those places. - task_switches_df.curr_state = task_switches_df.curr_state.fillna(value='A') - - # Join Wakeup and SchedSwitch events - task_latency_df = task_wakeup.join(task_switches_df, how='outer', - lsuffix='_wkp', rsuffix='_slp') - # Remove not required columns - task_latency_df = task_latency_df[['target_cpu', '__cpu', 'curr_state']] - # Set Wakeup state on each Wakeup event - task_latency_df.curr_state = task_latency_df.curr_state.fillna(value='W') - - # Sanity check for all task states to be mapped to a char - numbers = 0 - for value in task_switches_df.curr_state.unique(): - if type(value) is not str: - self._log.warning('The [sched_switch] events contain "prev_state" value [%s]', - value) - numbers += 1 - if numbers: - verb = 'is' if numbers == 1 else 'are' - self._log.warning(' which %s not currently mapped into a task state.', - verb) - self._log.warning('Check mappings in:') - self._log.warning(' %s::%s _task_state()', - __file__, self.__class__.__name__) - - # Forward annotate task state - task_latency_df['next_state'] = task_latency_df.curr_state.shift(-1) - - # Forward account for previous state duration - task_latency_df['t_start'] = task_latency_df.index - task_latency_df['t_delta'] = ( - task_latency_df['t_start'].shift(-1) - - task_latency_df['t_start'] - ) - - # Fix the last entry, which will have a NaN state duration - # Set duration to trace_end - last_event - task_latency_df.loc[task_latency_df.index[-1], 't_delta'] = ( - self._trace.start_time + - self._trace.time_range - - task_latency_df.index[-1] - ) - - return task_latency_df - - - # Select Wakeup latency - def df_latency_wakeup(self, task): + :returns: a :class:`pandas.DataFrame` with: + + * A ``wakeup_latency`` column (the wakeup latency at that timestamp). """ - DataFrame of task's wakeup latencies - The returned DataFrame index is the time, in seconds, `task` waken-up. - The DataFrame has just one column: - - wakeup_latency: the time the task waited before getting a CPU + df = self._trace.analysis.tasks.df_task_states(task) - :param task: the task to report wakeup latencies for - :type task: int or str - """ + df = df[(df.curr_state == TaskState.TASK_WAKING.char) & + (df.next_state == TaskState.TASK_ACTIVE.char)][["delta"]] - task_latency_df = self.df_latency(task) - if task_latency_df is None: - return None - df = task_latency_df[ - (task_latency_df.curr_state == 'W') & - (task_latency_df.next_state == 'A')][['t_delta']] - df.rename(columns={'t_delta' : 'wakeup_latency'}, inplace=True) + df.rename(columns={'delta' : 'wakeup_latency'}, inplace=True) return df - # Select Wakeup latency + @requires_events(TasksAnalysis.df_task_states.required_events) def df_latency_preemption(self, task): """ - DataFrame of task's preemption latencies + DataFrame of a task's preemption latencies - The returned DataFrame index is the time, in seconds, `task` has been - preempted. - The DataFrame has just one column: - - preemption_latency: the time the task waited before getting again a CPU - - :param task: the task to report wakeup latencies for + :param task: The task's name or PID :type task: int or str + + :returns: a :class:`pandas.DataFrame` with: + + * A ``preempt_latency`` column (the preemption latency at that timestamp). """ - task_latency_df = self.df_latency(task) - if task_latency_df is None: - return None - df = task_latency_df[ - (task_latency_df.curr_state.isin([0, 'R', 'R+'])) & - (task_latency_df.next_state == 'A')][['t_delta']] - df.rename(columns={'t_delta' : 'preempt_latency'}, inplace=True) + df = self._trace.analysis.tasks.df_task_states(task) + + df = df[(df.curr_state.str.contains(TaskState.TASK_RUNNING.char)) & + (df.next_state == TaskState.TASK_ACTIVE.char)][["delta"]] + + df.rename(columns={'delta' : 'preempt_latency'}, inplace=True) return df - @memoized + @requires_events(TasksAnalysis.df_task_states.required_events) def df_activations(self, task): """ - DataFrame of task's wakeup intrvals - - The returned DataFrame index is the time, in seconds, `task` has - waken-up. - The DataFrame has just one column: - - activation_interval: the time since the previous wakeup events + DataFrame of a task's activations - :param task: the task to report runtimes for + :param task: The task's name or PID :type task: int or str + + :returns: a :class:`pandas.DataFrame` with: + + * An ``activation_interval`` column (the time since the last activation). """ - # Select all wakeup events - wkp_df = self.df_latency(task) - wkp_df = wkp_df[wkp_df.curr_state == 'W'].copy() - # Compute delta between successive wakeup events - wkp_df['activation_interval'] = ( - wkp_df['t_start'].shift(-1) - wkp_df['t_start']) - wkp_df['activation_interval'] = wkp_df['activation_interval'].shift(1) - # Return the activation period each time the task wakeups - wkp_df = wkp_df[['activation_interval']].shift(-1) - return wkp_df - - @memoized + wkp_df = self._trace.analysis.tasks.df_task_states(task) + wkp_df = wkp_df[wkp_df.curr_state == TaskState.TASK_WAKING.char] + + index = wkp_df.index.to_frame() + wkp_df['activation_interval'] = (index.shift(-1) - index).shift(1) + + return wkp_df[["activation_interval"]] + + @requires_events(TasksAnalysis.df_task_states.required_events) def df_runtimes(self, task): """ DataFrame of task's runtime each time the task blocks - The returned DataFrame index is the time, in seconds, `task` completed - an activation (i.e. sleep or exit) - The DataFrame has just one column: - - running_time: the time the task spent RUNNING since its last wakeup - - :param task: the task to report runtimes for + :param task: The task's name or PID :type task: int or str - """ - # Select all wakeup events - run_df = self.df_latency(task) - - # Filter function to add up RUNNING intervals of each activation - def cr(row): - if row['curr_state'] in ['S']: - return cr.runtime - if row['curr_state'] in ['W']: - if cr.spurious_wkp: - cr.runtime += row['t_delta'] - cr.spurious_wkp = False - return cr.runtime - cr.runtime = 0 - return cr.runtime - if row['curr_state'] != 'A': - return cr.runtime - if row['next_state'] in ['R', 'R+', 'S', 'x', 'D']: - cr.runtime += row['t_delta'] - return cr.runtime - # This is required to capture strange trace sequences where - # a switch_in event is follower by a wakeup_event. - # This sequence is not expected, but we found it in some traces. - # Possible reasons could be: - # - misplaced sched_wakeup events - # - trace buffer artifacts - # TO BE BETTER investigated in kernel space. - # For the time being, we account this interval as RUNNING time, - # which is what kernelshark does. - if row['next_state'] in ['W']: - cr.runtime += row['t_delta'] - cr.spurious_wkp = True - return cr.runtime - if row['next_state'] in ['n']: - return cr.runtime - self._log.warning("Unexpected next state: %s @ %f", - row['next_state'], row['t_start']) - return 0 - # cr's static variables intialization - cr.runtime = 0 - cr.spurious_wkp = False - - # Add up RUNNING intervals of each activation - run_df['running_time'] = run_df.apply(cr, axis=1) - # Return RUNTIME computed for each activation, - # each time the task blocks or terminate - run_df = run_df[run_df.next_state.isin(['S', 'x'])][['running_time']] - return run_df - - @memoized - def df_task_residency(self, task): - """ - DataFrame of a task's execution time on each CPU - The returned DataFrame index is the CPU indexes - The DataFrame has just one column: - - runtime: the time the task spent being active on a given CPU, - in seconds. + :returns: a :class:`pandas.DataFrame` with: - :param task: the task to report runtimes for - :type task: int or str + * The times where the task stopped running as an index + * A ``curr_state`` column (the current task state, see + :class:`lisa.analysis.tasks.TaskState`) + * A ``running_time`` column (the cumulated running time since the + last activation). """ - cpus = list(range(self._trace.plat_info['cpus-count'])) - runtimes = {cpu : 0.0 for cpu in cpus} + df = self._trace.analysis.tasks.df_task_states(task) + + runtimes = [] + spurious_wkp = False + + # Using df.apply() is risky for counting (can be called more than once + # on the same row), so use a loop instead + for index, row in df.iterrows(): + runtime = runtimes[-1] if len(runtimes) else 0 + + if row.curr_state == TaskState.TASK_WAKING.char: + # This is required to capture strange trace sequences where + # a switch_in event is followed by a wakeup_event. + # This sequence is not expected, but we found it in some traces. + # Possible reasons could be: + # - misplaced sched_wakeup events + # - trace buffer artifacts + # TO BE BETTER investigated in kernel space. + # For the time being, we account this interval as RUNNING time, + # which is what kernelshark does. + if spurious_wkp: + runtime += row.delta + spurious_wkp = False + else: + # This is a new activation, reset the runtime counter + runtime = 0 - df = self.df_latency(task) + elif row.curr_state == TaskState.TASK_ACTIVE.char: + # This is the spurious wakeup thing mentionned above + if row.next_state == TaskState.TASK_WAKING.char: + spurious_wkp = True - # Exclude sleep time - df = df[df.curr_state != 'S'] + runtime += row.delta - for time, data in df.iterrows(): - cpu = data['__cpu'] + runtimes.append(runtime) - # When waking up, '__cpu' is NaN but 'target_cpu' is populated instead - if np.isnan(cpu): - if data['curr_state'] == 'W': - cpu = data['target_cpu'] - else: - raise RuntimeError('No CPU data for latency_df @{}'.format(time)) + df["running_time"] = runtimes - runtimes[cpu] += data['t_delta'] + # The runtime column is not entirely correct - at a task's first + # TASK_ACTIVE occurence, the running_time will be non-zero, even + # though the task has not run yet. However, it's much simpler to + # accumulate the running_time the way we do and shift it later. + df.running_time = df.running_time.shift(1) + df.running_time = df.running_time.fillna(0) - data = [(cpu, time) for cpu, time in runtimes.items()] - return pd.DataFrame(data, columns=['CPU', 'runtime']).set_index('CPU') + return df[~df.curr_state.isin([ + TaskState.TASK_ACTIVE.char, + TaskState.TASK_WAKING.char + ])][["curr_state", "running_time"]] - @memoized - def _get_latency_df(self, task, kind='all', threshold_ms=1): +############################################################################### +# Plotting Methods +############################################################################### + + @requires_events(df_latency_wakeup.required_events) + def plot_latencies(self, task, wakeup=True, preempt=True, threshold_ms=1, + filepath=None): """ - Compute statistics on latencies of the specified task. + Plot the latencies of a task over time + + :param task: The task's name or PID + :type task: int or str - :param task: the task to report latencies for - :type task: int or list(str) + :param wakeup: Whether to plot wakeup latencies + :type wakeup: bool - :param kind: the kind of latencies to report (WAKEUP and/or PREEMPT") - :type kind: str + :param preempt: Whether to plot preemption latencies + :type preempt: bool - :param threshold_ms: the minimum acceptable [ms] value to report - graphically in the generated plots + :param threshold_ms: The latency threshold to plot :type threshold_ms: int or float + """ + fig, axis = self.setup_plot() + + axis.axhline(threshold_ms / 1e3, linestyle='--', color=self.LATENCY_THRESHOLD_COLOR, + label="{}ms threshold".format(threshold_ms)) + + if wakeup: + df = self.df_latency_wakeup(task) + if df.empty: + self.get_logger().warning("No data to plot for wakeups") + else: + df.plot(ax=axis, style='+', label="Wakeup") - :returns: a DataFrame with statistics on task latencies + if preempt: + df = self.df_latency_preemption(task) + if df.empty: + self.get_logger().warning("No data to plot for preemption") + else: + df.plot(ax=axis, style='+', label="Preemption") + + + axis.set_title("Latencies of task \"{}\"".format(task)) + axis.set_ylabel("Latency (s)") + axis.legend() + axis.set_xlim(self._trace.x_min, self._trace.x_max) + + self.save_plot(fig, filepath) + return axis + + def _get_cdf(self, data, threshold): + """ + Build the "Cumulative Distribution Function" (CDF) for the given data """ - if not self._trace.hasEvents('sched_switch'): - self._log.warning('Event [sched_switch] not found, ' - 'plot DISABLED!') - return - if not self._trace.hasEvents('sched_wakeup'): - self._log.warning('Event [sched_wakeup] not found, ' - 'plot DISABLED!') - return - - # Get task data - td = self._get_task_data(task) - if not td: - return None - - # Load wakeup latencies (if required) + # Build the series of sorted values + ser = data.sort_values() + df = pd.Series(np.linspace(0., 1., len(ser)), index=ser) + + # Compute percentage of samples above/below the specified threshold + below = float(max(df[:threshold])) + above = 1 - below + return df, above, below + + @requires_events(df_latency_wakeup.required_events) + def _get_latencies_df(self, task, wakeup, preempt): wkp_df = None - if 'all' in kind or 'wakeup' in kind: - wkp_df = self.df_latency_wakeup(td.pid) - if wkp_df is not None: + prt_df = None + + if wakeup: + wkp_df = self.df_latency_wakeup(task) wkp_df.rename(columns={'wakeup_latency' : 'latency'}, inplace=True) - self._log.info('Found: %5d WAKEUP latencies', len(wkp_df)) - # Load preempt latencies (if required) - prt_df = None - if 'all' in kind or 'preempt' in kind: - prt_df = self.df_latency_preemption(td.pid) - if prt_df is not None: + if preempt: + prt_df = self.df_latency_preemption(task) prt_df.rename(columns={'preempt_latency' : 'latency'}, inplace=True) - self._log.info('Found: %5d PREEMPT latencies', len(prt_df)) - - if wkp_df is None and prt_df is None: - self._log.warning('No Latency info for task [%s]', td.label) - return - # Join the two data frames - df = wkp_df.append(prt_df) - cdf = self._get_cdf(df.latency, (threshold_ms / 1000.)) + if wakeup and preempt: + df = wkp_df.append(prt_df) + else: + df = wkp_df or prt_df - return df, cdf + return df - @memoized - def df_latency_stats(self, task, kind='all', threshold_ms=1): + @requires_events(_get_latencies_df.required_events) + def plot_latencies_cdf(self, task, wakeup=True, preempt=True, + threshold_ms=1, filepath=None): """ - Compute statistics on latencies of the specified task. + Plot the latencies Cumulative Distribution Function of a task + + :param task: The task's name or PID + :type task: int or str - :param task: the task to report latencies for - :type task: int or list(str) + :param wakeup: Whether to plot wakeup latencies + :type wakeup: bool - :param kind: the kind of latencies to report (WAKEUP and/or PREEMPT") - :type kind: str + :param preempt: Whether to plot preemption latencies + :type preempt: bool - :param threshold_ms: the minimum acceptable [ms] value to report - graphically in the generated plots + :param threshold_ms: The latency threshold to plot :type threshold_ms: int or float - - :returns: a DataFrame with statistics on task latencies """ - # Get latency events - df, cdf = self._get_latency_df(task, kind, threshold_ms) + fig, axis = self.setup_plot() - # Return statistics - stats_df = df.describe(percentiles=[0.95, 0.99]) - label = '{:.1f}%'.format(100. * cdf.below) - stats = { label : cdf.threshold } - return stats_df.append(pd.DataFrame( - list(stats.values()), columns=['latency'], index=list(stats.keys()))) + df = self._get_latencies_df(task, wakeup, preempt) + threshold_s = threshold_ms / 1e3 + cdf_df, above, below = self._get_cdf(df.latency, threshold_s) + cdf_df.plot(ax=axis, xlim=(0, None), label="CDF") + axis.axhline(below, linestyle='--', color=self.LATENCY_THRESHOLD_COLOR, + label="Latencies below {}ms".format(threshold_ms)) + axis.axvspan(0, threshold_s, facecolor=self.LATENCY_THRESHOLD_ZONE_COLOR, + alpha=0.5, label="{}ms threshold zone".format(threshold_ms)); -############################################################################### -# Plotting Methods -############################################################################### + axis.set_title("Latencies CDF of task \"{}\"".format(task)) + axis.set_xlabel("Latency (s)") + axis.set_ylabel("Latencies below the x value (%)") + axis.legend() - def plot_latency(self, task, kind='all', tag=None, threshold_ms=1, bins=64): + self.save_plot(fig, filepath) + return axis + + @requires_events(_get_latencies_df.required_events) + def plot_latencies_histogram(self, task, wakeup=True, preempt=True, + threshold_ms=1, bins=64, filepath=None): """ - Generate a set of plots to report the WAKEUP and PREEMPT latencies the - specified task has been subject to. A WAKEUP latencies is the time from - when a task becomes RUNNABLE till the first time it gets a CPU. - A PREEMPT latencies is the time from when a RUNNING task is suspended - because of the CPU is assigned to another task till when the task - enters the CPU again. + Plot the latencies histogram of a task - :param task: the task to report latencies for - :type task: int or list(str) + :param task: The task's name or PID + :type task: int or str - :param kind: the kind of latencies to report (WAKEUP and/or PREEMPT") - :type kind: str + :param wakeup: Whether to plot wakeup latencies + :type wakeup: bool - :param tag: a string to add to the plot title - :type tag: str + :param preempt: Whether to plot preemption latencies + :type preempt: bool - :param threshold_ms: the minimum acceptable [ms] value to report - graphically in the generated plots + :param threshold_ms: The latency threshold to plot :type threshold_ms: int or float - - :param bins: number of bins to be used for the runtime's histogram - :type bins: int - - :returns: a DataFrame with statistics on ploted latencies """ + fig, axis= self.setup_plot() - # Get latency events - df, cdf = self._get_latency_df(task, kind, threshold_ms) - self._log.info('Total: %5d latency events', len(df)) - self._log.info('%.1f %% samples below %d [ms] threshold', - 100. * cdf.below, threshold_ms) - - # Get task data - td = self._get_task_data(task) - if not td: - return None - - # Setup plots - gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], width_ratios=[1,1]) - plt.figure(figsize=(16, 8)) - - plot_title = "[{}]: {} latencies".format(td.label, kind.upper()) - if tag: - plot_title = "{} [{}]".format(plot_title, tag) - plot_title = "{}, threshold @ {} [ms]".format(plot_title, threshold_ms) - - # Latency events duration over time - axes = plt.subplot(gs[0,0:2]) - axes.set_title(plot_title) - try: - wkp_df.rename(columns={'latency': 'wakeup'}, inplace=True) - wkp_df.plot(style='b+', logy=True, ax=axes) - except Exception: - pass - try: - prt_df.rename(columns={'latency' : 'preempt'}, inplace=True) - prt_df.plot(style='r+', logy=True, ax=axes) - except Exception: - pass - axes.axhline(threshold_ms / 1000., linestyle='--', color='g') - self._trace.analysis.status.plot_overutilized(axes) - axes.legend(loc='lower center', ncol=2) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - - # Cumulative distribution of latencies samples - axes = plt.subplot(gs[1,0]) - cdf.df.plot(ax=axes, legend=False, xlim=(0,None), - title='Latencies CDF ({:.1f}% within {} [ms] threshold)'\ - .format(100. * cdf.below, threshold_ms)) - axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); - axes.axhline(y=cdf.below, linewidth=1, color='r', linestyle='--') - - # Histogram of all latencies - axes = plt.subplot(gs[1,1]) - ymax = 1.1 * df.latency.max() - df.latency.plot(kind='hist', bins=bins, ax=axes, - xlim=(0,ymax), legend=False, - title='Latency histogram ({} bins, {} [ms] green threshold)'\ - .format(bins, threshold_ms)); - axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); - - # Save generated plots into datadir - task_name = re.sub('[\ :/]', '_', td.label) - figname = '{}/{}task_latencies_{}_{}.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix, - td.pid, task_name) - pl.savefig(figname, bbox_inches='tight') - - - def plot_latency_bands(self, task, axes=None): - """ - Draw a plot that shows intervals of time when the execution of a - RUNNABLE task has been delayed. The plot reports: - WAKEUP lantecies as RED colored bands - PREEMPTION lantecies as BLUE colored bands + df = self._get_latencies_df(task, wakeup, preempt) + threshold_s = threshold_ms / 1e3 - The optional axes parameter allows to plot the signal on an existing - graph. + df.latency.plot.hist(bins=bins, ax=axis, xlim=(0, 1.1 * df.latency.max())) + axis.axvspan(0, threshold_s, facecolor=self.LATENCY_THRESHOLD_ZONE_COLOR, alpha=0.5, + label="{}ms threshold zone".format(threshold_ms)); - :param task: the task to report latencies for - :type task: str + axis.set_title("Latencies histogram of task \"{}\"".format(task)) + axis.set_xlabel("Latency (s)") + axis.legend() - :param axes: axes on which to plot the signal - :type axes: :mod:`matplotlib.axes.Axes` - """ - if not self._trace.hasEvents('sched_switch'): - self._log.warning('Event [sched_switch] not found, ' - 'plot DISABLED!') - return - if not self._trace.hasEvents('sched_wakeup'): - self._log.warning('Event [sched_wakeup] not found, ' - 'plot DISABLED!') - return - - # Get task PID - td = self._get_task_data(task) - if not td: - return None - - wkl_df = self.df_latency_wakeup(td.pid) - prt_df = self.df_latency_preemption(td.pid) - - if wkl_df is None and prt_df is None: - self._log.warning('No task with name [%s]', td.label) - return - - # If not axis provided: generate a standalone plot - if not axes: - gs = gridspec.GridSpec(1, 1) - plt.figure(figsize=(16, 2)) - axes = plt.subplot(gs[0, 0]) - axes.set_title('Latencies on [{}] ' - '(red: WAKEUP, blue: PREEMPT)'\ - .format(td.label)) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_yticklabels([]) - axes.set_xlabel('Time [s]') - axes.grid(True) - - # Draw WAKEUP latencies - try: - bands = [(t, wkl_df['wakeup_latency'][t]) for t in wkl_df.index] - for (start, duration) in bands: - end = start + duration - axes.axvspan(start, end, facecolor='r', alpha=0.1) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - except Exception: - pass - - # Draw PREEMPTION latencies - try: - bands = [(t, prt_df['preempt_latency'][t]) for t in prt_df.index] - for (start, duration) in bands: - end = start + duration - axes.axvspan(start, end, facecolor='b', alpha=0.1) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - except Exception: - pass + self.save_plot(fig, filepath) + return axis - def plot_activations(self, task, tag=None, threshold_ms=16, bins=64): + @requires_events(df_latency_wakeup.required_events) + def plot_latency_bands(self, task, filepath=None, axis=None): """ - Plots "activation intervals" for the specified task - - An "activation interval" is time incurring between two consecutive - wakeups of a task. A set of plots is generated to report: - - Activations interval at wakeup time: every time a task wakeups a - point is plotted to represent the time interval since the previous - wakeup. - - Activations interval cumulative function: reports the cumulative - function of the activation intervals. - - Activations intervals histogram: reports a 64 bins histogram of - the activation intervals. - - All plots are parameterized based on the value of threshold_ms, which - can be used to filter activations intervals bigger than 2 times this - value. - Such a threshold is useful to filter out from the plots outliers thus - focusing the analysis in the most critical periodicity under analysis. - The number and percentage of discarded samples is reported in output. - A default threshold of 16 [ms] is used, which is useful for example - to analyze a 60Hz rendering pipelines. - - A PNG of the generated plots is generated and saved in the same folder - where the trace is. - - :param task: the task to report latencies for - :type task: int or list(str) - - :param tag: a string to add to the plot title - :type tag: str - - :param threshold_ms: the minimum acceptable [ms] value to report - graphically in the generated plots - :type threshold_ms: int or float + Draw the task wakeup/preemption latencies as colored bands - :param bins: number of bins to be used for the runtime's histogram - :type bins: int + :param task: The task's name or PID + :type task: int or str - :returns: a DataFrame with statistics on ploted activation intervals + :param axis: If provided, overlay the bands on this axis + :type axis: matplotlib.axes.Axes """ + local_fig = axis is None - if not self._trace.hasEvents('sched_switch'): - self._log.warning('Event [sched_switch] not found, ' - 'plot DISABLED!') - return - if not self._trace.hasEvents('sched_wakeup'): - self._log.warning('Event [sched_wakeup] not found, ' - 'plot DISABLED!') - return - - # Get task data - td = self._get_task_data(task) - if not td: - return None - - # Load activation data - wkp_df = self.df_activations(td.pid) - if wkp_df is None: - return None - self._log.info('Found: %5d activations for [%s]', - len(wkp_df), td.label) - - # Disregard data above two time the specified threshold - y_max = (2 * threshold_ms) / 1000. - len_tot = len(wkp_df) - wkp_df = wkp_df[wkp_df.activation_interval <= y_max] - len_plt = len(wkp_df) - if len_plt < len_tot: - len_dif = len_tot - len_plt - len_pct = 100. * len_dif / len_tot - self._log.warning('Discarding {} activation intervals (above 2 x threshold_ms, ' - '{:.1f}% of the overall activations)'\ - .format(len_dif, len_pct)) - ymax = 1.1 * wkp_df.activation_interval.max() - - # Build the series for the CDF - cdf = self._get_cdf(wkp_df.activation_interval, (threshold_ms / 1000.)) - self._log.info('%.1f %% samples below %d [ms] threshold', - 100. * cdf.below, threshold_ms) - - # Setup plots - gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], width_ratios=[1,1]) - plt.figure(figsize=(16, 8)) - - plot_title = "[{}]: activaton intervals (@ wakeup time)".format(td.label) - if tag: - plot_title = "{} [{}]".format(plot_title, tag) - plot_title = "{}, threshold @ {} [ms]".format(plot_title, threshold_ms) - - # Activations intervals over time - axes = plt.subplot(gs[0,0:2]) - axes.set_title(plot_title) - wkp_df.plot(style='g+', logy=False, ax=axes) - - axes.axhline(threshold_ms / 1000., linestyle='--', color='g') - self._trace.analysis.status.plot_overutilized(axes) - axes.legend(loc='lower center', ncol=2) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - - # Cumulative distribution of all activations intervals - axes = plt.subplot(gs[1,0]) - cdf.df.plot(ax=axes, legend=False, xlim=(0,None), - title='Activations CDF ({:.1f}% within {} [ms] threshold)'\ - .format(100. * cdf.below, threshold_ms)) - axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); - axes.axhline(y=cdf.below, linewidth=1, color='r', linestyle='--') - - # Histogram of all activations intervals - axes = plt.subplot(gs[1,1]) - wkp_df.plot(kind='hist', bins=bins, ax=axes, - xlim=(0,ymax), legend=False, - title='Activation intervals histogram ({} bins, {} [ms] green threshold)'\ - .format(bins, threshold_ms)); - axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); - - # Save generated plots into datadir - task_name = re.sub('[\ :/]', '_', td.label) - figname = '{}/{}task_activations_{}_{}.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix, - td.pid, task_name) - pl.savefig(figname, bbox_inches='tight') - - # Return statistics - stats_df = wkp_df.describe(percentiles=[0.95, 0.99]) - label = '{:.1f}%'.format(100. * cdf.below) - stats = { label : cdf.threshold } - return stats_df.append(pd.DataFrame( - list(stats.values()), columns=['activation_interval'], index=list(stats.keys()))) - - - def plot_runtimes(self, task, tag=None, threshold_ms=8, bins=64): - """ - Plots "running times" for the specified task - - A "running time" is the sum of all the time intervals a task executed - in between a wakeup and the next sleep (or exit). - A set of plots is generated to report: - - Running times at block time: every time a task blocks a - point is plotted to represent the cumulative time the task has be - running since its last wakeup - - Running time cumulative function: reports the cumulative - function of the running times. - - Running times histogram: reports a 64 bins histogram of - the running times. - - All plots are parameterized based on the value of threshold_ms, which - can be used to filter running times bigger than 2 times this value. - Such a threshold is useful to filter out from the plots outliers thus - focusing the analysis in the most critical periodicity under analysis. - The number and percentage of discarded samples is reported in output. - A default threshold of 16 [ms] is used, which is useful for example to - analyze a 60Hz rendering pipelines. - - A PNG of the generated plots is generated and saved in the same folder - where the trace is. - - :param task: the task to report latencies for - :type task: int or list(str) - - :param tag: a string to add to the plot title - :type tag: str - - :param threshold_ms: the minimum acceptable [ms] value to report - graphically in the generated plots - :type threshold_ms: int or float + if local_fig: + fig, axis = self.setup_plot() - :param bins: number of bins to be used for the runtime's histogram - :type bins: int + wkl_df = self.df_latency_wakeup(task) + prt_df = self.df_latency_preemption(task) - :returns: a DataFrame with statistics on ploted running times - """ + def plot_bands(df, column, label): + bands = [(t, df[column][t]) for t in df.index] + color = self.get_next_color(axis) + for idx, (start, duration) in enumerate(bands): + if idx > 0: + label = None + + end = start + duration + axis.axvspan(start, end, facecolor=color, alpha=0.5, + label=label) + + plot_bands(wkl_df, "wakeup_latency", "Wakeup latencies") + plot_bands(prt_df, "preempt_latency", "Preemption latencies") + axis.legend() + axis.set_xlim(self._trace.x_min, self._trace.x_max) + + if local_fig: + self.save_plot(fig, filepath) + + return axis - if not self._trace.hasEvents('sched_switch'): - self._log.warning('Event [sched_switch] not found, ' - 'plot DISABLED!') - return - if not self._trace.hasEvents('sched_wakeup'): - self._log.warning('Event [sched_wakeup] not found, ' - 'plot DISABLED!') - return - - # Get task data - td = self._get_task_data(task) - if not td: - return None - - # Load runtime data - run_df = self.df_runtimes(td.pid) - if run_df is None: - return None - self._log.info('Found: %5d activations for [%s]', - len(run_df), td.label) - - # Disregard data above two time the specified threshold - y_max = (2 * threshold_ms) / 1000. - len_tot = len(run_df) - run_df = run_df[run_df.running_time <= y_max] - len_plt = len(run_df) - if len_plt < len_tot: - len_dif = len_tot - len_plt - len_pct = 100. * len_dif / len_tot - self._log.warning('Discarding {} running times (above 2 x threshold_ms, ' - '{:.1f}% of the overall activations)'\ - .format(len_dif, len_pct)) - ymax = 1.1 * run_df.running_time.max() - - # Build the series for the CDF - cdf = self._get_cdf(run_df.running_time, (threshold_ms / 1000.)) - self._log.info('%.1f %% samples below %d [ms] threshold', - 100. * cdf.below, threshold_ms) - - # Setup plots - gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], width_ratios=[1,1]) - plt.figure(figsize=(16, 8)) - - plot_title = "[{}]: running times (@ block time)".format(td.label) - if tag: - plot_title = "{} [{}]".format(plot_title, tag) - plot_title = "{}, threshold @ {} [ms]".format(plot_title, threshold_ms) - - # Running time over time - axes = plt.subplot(gs[0,0:2]) - axes.set_title(plot_title) - run_df.plot(style='g+', logy=False, ax=axes) - - axes.axhline(threshold_ms / 1000., linestyle='--', color='g') - self._trace.analysis.status.plot_overutilized(axes) - axes.legend(loc='lower center', ncol=2) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - - # Cumulative distribution of all running times - axes = plt.subplot(gs[1,0]) - cdf.df.plot(ax=axes, legend=False, xlim=(0,None), - title='Runtime CDF ({:.1f}% within {} [ms] threshold)'\ - .format(100. * cdf.below, threshold_ms)) - axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); - axes.axhline(y=cdf.below, linewidth=1, color='r', linestyle='--') - - # Histogram of all running times - axes = plt.subplot(gs[1,1]) - run_df.plot(kind='hist', bins=bins, ax=axes, - xlim=(0,ymax), legend=False, - title='Latency histogram ({} bins, {} [ms] green threshold)'\ - .format(bins, threshold_ms)); - axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5); - - # Save generated plots into datadir - task_name = re.sub('[\ :/]', '_', td.label) - figname = '{}/{}task_runtimes_{}_{}.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix, - td.pid, task_name) - pl.savefig(figname, bbox_inches='tight') - - # Return statistics - stats_df = run_df.describe(percentiles=[0.95, 0.99]) - label = '{:.1f}%'.format(100. * cdf.below) - stats = { label : cdf.threshold } - return stats_df.append(pd.DataFrame( - list(stats.values()), columns=['running_time'], index=list(stats.keys()))) - - def plot_task_residency(self, task): + @requires_events(df_activations.required_events) + def plot_activations(self, task, filepath=None): """ - Plot CPU residency of the specified task - This will show an overview of how much time that task spent being - active on each available CPU, in seconds. + Plot the :meth:`lisa.analysis.latency.LatencyAnalysis.df_activations` of a task - :param task: the task to report runtimes for + :param task: The task's name or PID :type task: int or str """ - df = self.df_task_residency(task) + fig, axis = self.setup_plot() - ax = df.plot(kind='bar', figsize=(16, 6)) - ax.set_title('CPU residency of task {}'.format(task)) + wkp_df = self.df_activations(task) - figname = os.path.join( - self._trace.plots_dir, - '{}task_cpu_residency_{}.png'.format( - self._trace.plots_prefix, task - ) - ) + wkp_df.plot(style='+', logy=False, ax=axis) - pl.savefig(figname, bbox_inches='tight') + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) -############################################################################### -# Utility Methods -############################################################################### - - @memoized - def _get_task_data(self, task): - - # Get task PID - if isinstance(task, str): - task_pids = self._trace.getTaskByName(task) - if len(task_pids) == 0: - self._log.warning('No tasks found with name [%s]', task) - return None - - task_pid = task_pids[0] - if len(task_pids) > 1: - self._log.warning('Multiple PIDs for task named [%s]', task) - for pid in task_pids: - self._log.warning(' %5d : %s', pid, - ','.join(self._trace.getTaskByPid(pid))) - self._log.warning('Returning stats only for PID: %d', - task_pid) - task_name = self._trace.getTaskByPid(task_pid) - - # Get task name - elif isinstance(task, int): - task_pid = task - task_name = self._trace.getTaskByPid(task_pid) - if task_name is None: - self._log.warning('No tasks found with name [%s]', task) - return None + axis.set_title("Activation intervals of task \"{}\"".format(task)) - else: - raise ValueError("Task must be either an int or str") - - task_label = "{}: {}".format(task_pid, task_name) - return TaskData(task_pid, task_name, task_label) - - @memoized - def _task_state(self, state): - try: - state = int(state) - except ValueError: - # State already converted to symbol - return state - - # Tasks STATE flags (Linux 3.18) - TASK_STATES = { - 0: "R", # TASK_RUNNING - 1: "S", # TASK_INTERRUPTIBLE - 2: "D", # TASK_UNINTERRUPTIBLE - 4: "T", # __TASK_STOPPED - 8: "t", # __TASK_TRACED - 16: "X", # EXIT_DEAD - 32: "Z", # EXIT_ZOMBIE - 64: "x", # TASK_DEAD - 128: "K", # TASK_WAKEKILL - 256: "W", # TASK_WAKING - 512: "P", # TASK_PARKED - 1024: "N", # TASK_NOLOAD - } - try: - kernel_version = self._trace.plat_info['kernel-version'] - except KeyError: - self._log.info('Parsing task states assuming 3.18 kernel') - kernel_version = KernelVersion('3.18') - - if kernel_version.parts >= (4, 8): - TASK_STATES[2048] = "n" # TASK_NEW - TASK_MAX_STATE = 2 * max(TASK_STATES) - - res = "R" - if state & (TASK_MAX_STATE - 1) != 0: - res = "" - for key in list(TASK_STATES.keys()): - if key & state: - res += TASK_STATES[key] - if state & TASK_MAX_STATE: - res += "+" - else: - res = '|'.join(res) - return res + axis.set_xlim(self._trace.x_min, self._trace.x_max) + self.save_plot(fig, filepath) + return axis - def _get_cdf(self, data, threshold): + @requires_events(df_runtimes.required_events) + def plot_runtimes(self, task, filepath=None): """ - Build the "Cumulative Distribution Function" (CDF) for the given data + Plot the :meth:`lisa.analysis.latency.LatencyAnalysis.df_runtimes` of a task + + :param task: The task's name or PID + :type task: int or str """ + fig, axis = self.setup_plot() - # Build the series of sorted values - ser = data.sort_values() - if len(ser) < 1000: - # Append again the last (and largest) value. - # This step is important especially for small sample sizes - # in order to get an unbiased CDF - ser = ser.append(pd.Series(ser.iloc[-1])) - df = pd.Series(np.linspace(0., 1., len(ser)), index=ser) + df = self.df_runtimes(task) - # Compute percentage of samples above/below the specified threshold - below = float(max(df[:threshold])) - above = 1 - below - return CDF(df, threshold, above, below) + df.plot(style='+', ax=axis) + + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) + + axis.set_title("Per-activation runtimes of task \"{}\"".format(task)) + + axis.set_xlim(self._trace.x_min, self._trace.x_max) + self.save_plot(fig, filepath) + return axis # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/lisa/analysis/load_tracking.py b/lisa/analysis/load_tracking.py new file mode 100644 index 0000000000000000000000000000000000000000..6e2f59b541b9122912e3162f057524fb54ae2470 --- /dev/null +++ b/lisa/analysis/load_tracking.py @@ -0,0 +1,336 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (C) 2018, Arm Limited and contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" Scheduler load tracking analysis module """ + +import pandas as pd + +from lisa.analysis.base import AnalysisBase + + +class LoadTrackingAnalysis(AnalysisBase): + """ + Support for scheduler load tracking analysis + + :param trace: input Trace object + :type trace: lisa.trace.Trace + """ + + name = 'load_tracking' + + def __init__(self, trace): + super().__init__(trace) + + @classmethod + def _columns_renaming(cls, event): + """ + Columns to rename to unify dataframes between trace event versions + """ + if event in ['sched_load_avg_cpu', 'sched_load_avg_task']: + return { + "util_avg" : "util", + "load_avg" : "load" + } + + return {} + + @classmethod + def _columns_to_drop(cls, event): + """ + The extra columns not shared between trace event versions + """ + if event in ['sched_load_cfs_rq', 'sched_load_se']: + return ['path', 'rbl_load', 'cpu'] + + if event in ['sched_load_avg_task']: + return ['load_sum', 'period_contrib', 'util_sum'] + + return [] + + def _df_uniformized_signal(self, event): + df = self._trace.df_events(event) + + df = df.rename(columns=self._columns_renaming(event)) + + if event == 'sched_load_se': + df = df[df.path == "(null)"] + + if event == 'sched_load_cfs_rq': + df = df[df.path == "/"] + + to_drop = self._columns_to_drop(event) + df = df[[col for col in df.columns if col not in to_drop]] + + return df + + + def _df_either_event(self, events): + for event in events: + if event not in self._trace.available_events: + continue + + return self._df_uniformized_signal(event) + + raise RuntimeError("Trace is missing one of either events: {}".format(events)) + + def df_cpus_signals(self): + """ + Get the load-tracking signals for the CPUs + + :returns: a :class:`pandas.DataFrame` with: + + * A ``util`` column (the average utilization of a CPU at time t) + * A ``load`` column (the average load of a CPU at time t) + + :Required events: + Either of: + + * ``sched_load_cfs_rq`` + * ``sched_load_avg_cpu`` + """ + return self._df_either_event(['sched_load_cfs_rq', 'sched_load_avg_cpu']) + + def df_tasks_signals(self): + """ + Get the load-tracking signals for the tasks + + :returns: a :class:`pandas.DataFrame` with: + + * A ``util`` column (the average utilization of a task at time t) + * A ``load`` column (the average load of a task at time t) + + If CPU capacity information is available: + + * A ``required_capacity`` column (the minimum available CPU capacity + required to run this task without being CPU-bound) + + :Required events: + Either of: + + * ``sched_load_se`` + * ``sched_load_avg_task`` + """ + df = self._df_either_event(['sched_load_se', 'sched_load_avg_task']) + + if "cpu-capacities" in self._trace.plat_info: + # Add a column which represents the max capacity of the smallest + # CPU which can accomodate the task utilization + capacities = sorted(self._trace.plat_info["cpu-capacities"].values()) + + def fits_capacity(util): + for capacity in capacities: + if util <= capacity: + return capacity + + return capacities[-1] + + df["required_capacity"] = df.util.map(fits_capacity) + + return df + + def df_top_big_tasks(self, util_threshold, min_samples=100): + """ + Tasks which had 'utilization' samples bigger than the specified + threshold + + :param min_samples: minumum number of samples over the min_utilization + :type min_samples: int + + :param min_utilization: minimum utilization used to filter samples + default: capacity of a little cluster + :type min_utilization: int + + :returns: a :class:`pandas.DataFrame` with: + + * Task PIDs as index + * A ``samples`` column (The number of util samples above the threshold) + """ + df = self.df_tasks_signals() + + # Compute number of samples above threshold + samples = df[df.util > util_threshold].groupby('pid').count()["util"] + samples = samples[samples > min_samples] + samples = samples.sort_values(ascending=False) + + top_df = pd.DataFrame(samples).rename(columns={"util" : "samples"}) + top_df["comm"] = top_df.index.map(self._trace.get_task_by_pid) + + return top_df + + def plot_cpus_signals(self, cpus=None, filepath=None): + """ + Plot the CPU-related load-tracking signals + + :param cpus: list of CPUs to be plotted + :type cpus: list(int) + """ + cpus = cpus or list(range(self._trace.cpus_count)) + fig, axes = self.setup_plot(nrows=len(cpus), sharex=True) + + cpus_df = self.df_cpus_signals() + + for idx, cpu in enumerate(cpus): + axis = axes[cpu] if len(cpus) > 1 else axes + + # Add CPU utilization + axis.set_title('CPU{}'.format(cpu)) + df = cpus_df[cpus_df["__cpu"] == cpu] + + df[['util']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) + df[['load']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) + + self._trace.analysis.cpus.plot_orig_capacity(axis, cpu) + + # Add capacities data if available + if self._trace.hasEvents('cpu_capacity'): + df = self._trace.df_events('cpu_capacity') + df = df[df["__cpu"] == cpu] + if len(df): + data = df[['capacity', 'tip_capacity']] + data.plot(ax=axis, style=['m', '--y'], + drawstyle='steps-post') + + # Add overutilized signal to the plot + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) + + axis.set_ylim(0, 1100) + axis.set_xlim(self._trace.x_min, self._trace.x_max) + axis.legend() + + self.save_plot(fig, filepath) + return axes + + def plot_task_signals(self, task, filepath=None): + """ + Plot the task-related load-tracking signals + + :param task: The name or PID of the task + :type task: str or int + """ + fig, axis = self.setup_plot() + + df = self.df_tasks_signals() + + pid = self._trace.get_task_pid(task) + df = df[df.pid == pid] + + df[['util']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) + df[['load']].plot(ax=axis, drawstyle='steps-post', alpha=0.4) + + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) + + axis.set_title('Load-tracking signals of task "{}"'.format(task)) + axis.legend() + axis.grid(True) + axis.set_xlim(self._trace.x_min, self._trace.x_max) + + self.save_plot(fig, filepath) + return axis + + def plot_task_required_capacity(self, task, filepath=None, axis=None): + """ + Plot the minimum required capacity of a task + + :param task: The name or PID of the task + :type task: str or int + + :param axis: If provided, overlay the required capacity on this axis + :type axis: matplotlib.axes.Axes + """ + local_fig = axis is None + + if local_fig: + fig, axis = self.setup_plot(height=8) + + pid = self._trace.get_task_pid(task) + + df = self.df_tasks_signals() + df = df[df.pid == pid] + + # Build task names (there could be multiple, during the task lifetime) + task_name = 'Task ({}:{})'.format(pid, self._trace.get_task_by_pid(pid)) + + df["required_capacity"].plot( + drawstyle='steps-post', + ax=axis) + + axis.legend() + axis.grid(True) + + if local_fig: + axis.set_title(task_name) + axis.set_ylim(0, 1100) + axis.set_xlim(self._trace.x_min, self._trace.x_max) + axis.set_ylabel('Utilization') + axis.set_xlabel('Time (s)') + + self.save_plot(fig, filepath) + + return axis + + def plot_task_placement(self, task, filepath=None): + """ + Plot the CPU placement of the task + + :param task: The name or PID of the task + :type task: str or int + """ + fig, axis = self.setup_plot() + + # Get all utilization update events + df = self.df_tasks_signals() + + pid = self._trace.get_task_pid(task) + df = df[df.pid == pid] + + cpu_capacities = self._trace.plat_info["cpu-capacities"] + + def evaluate_placement(cpu, required_capacity): + capacity = cpu_capacities[cpu] + + if capacity < required_capacity: + return "CPU capacity < required capacity" + elif capacity == required_capacity: + return "CPU capacity == required capacity" + else: + return "CPU capacity > required capacity" + + df["placement"] = df.apply( + lambda row: evaluate_placement( + row["__cpu"], + row["required_capacity"]), axis=1) + + for stat in df["placement"].unique(): + df[df.placement == stat]["__cpu"].plot(ax=axis, style="+", label=stat) + + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) + + axis.set_title("Utilization vs placement of task \"{}\"".format(task)) + + axis.set_xlim(self._trace.x_min, self._trace.x_max) + axis.grid(True) + axis.legend() + + self.save_plot(fig, filepath) + + return axis diff --git a/lisa/analysis/status.py b/lisa/analysis/status.py index 3257ee396f486bfeb7a6394a63f6bf9af6addfe8..858b28e267f60f6a49ea9c967bf32f05e1ce9972 100644 --- a/lisa/analysis/status.py +++ b/lisa/analysis/status.py @@ -19,11 +19,7 @@ """ System Status Analaysis Module """ -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt - -from lisa.analysis.base import AnalysisBase - +from lisa.analysis.base import AnalysisBase, requires_events class StatusAnalysis(AnalysisBase): """ @@ -43,13 +39,16 @@ class StatusAnalysis(AnalysisBase): # DataFrame Getter Methods ############################################################################### + @requires_events(["sched_overutilized"]) def df_overutilized(self): """ - Get data frame with sched_overutilized data. - """ - if not self._trace.hasEvents('sched_overutilized'): - return None + Get overutilized events + :returns: A :class:`pandas.DataFrame` with: + + * A ``overutilized`` column (the overutilized status at a given time) + * A ``len`` column (the time spent in that overutilized status) + """ # Build sequence of overutilization "bands" df = self._trace.df_events('sched_overutilized') @@ -61,7 +60,6 @@ class StatusAnalysis(AnalysisBase): # df = df.reset_index()\ # .drop_duplicates(subset='Time', keep='last')\ # .set_index('Time') - return df[['len', 'overutilized']] @@ -69,44 +67,41 @@ class StatusAnalysis(AnalysisBase): # Plotting Methods ############################################################################### - def plot_overutilized(self, axes=None): + @requires_events(df_overutilized.required_events) + def plot_overutilized(self, filepath=None, axis=None): """ - Draw a plot that shows intervals of time where the system was reported - as overutilized. + Draw the system's overutilized status as colored bands - The optional axes parameter allows to plot the signal on an existing - graph. - - :param axes: axes on which to plot the signal - :type axes: :mod:`matplotlib.axes.Axes` + :param axis: If provided, overlay the bands on this axis + :type axis: matplotlib.axes.Axes """ - if not self._trace.hasEvents('sched_overutilized'): - self._log.warning('Event [sched_overutilized] not found, ' - 'plot DISABLED!') - return + local_fig = axis is None + + if local_fig: + fig, axis = self.setup_plot() df = self.df_overutilized() # Compute intervals in which the system is reported to be overutilized bands = [(t, df['len'][t], df['overutilized'][t]) for t in df.index] - # If not axis provided: generate a standalone plot - if not axes: - gs = gridspec.GridSpec(1, 1) - plt.figure(figsize=(16, 1)) - axes = plt.subplot(gs[0, 0]) - axes.set_title('System Status {white: EAS mode, ' - 'red: Non EAS mode}') - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.set_yticklabels([]) - axes.set_xlabel('Time [s]') - axes.grid(True) - - # Otherwise: draw overutilized bands on top of the specified plot + color = self.get_next_color(axis) + label = "Overutilized" for (start, delta, overutilized) in bands: if not overutilized: continue + end = start + delta - axes.axvspan(start, end, facecolor='r', alpha=0.1) + axis.axvspan(start, end, alpha=0.2, facecolor=color, label=label) + + if label: + label = None + + axis.legend() + + if local_fig: + axis.set_title("System-wide overutilized status") + axis.set_xlim(self._trace.x_min, self._trace.x_max) + self.save_plot(fig, filepath) # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/lisa/analysis/tasks.py b/lisa/analysis/tasks.py index 3a7cd79d44fea81d6668f263ce210778c2ce15f3..fd04080d9a37f0fc69e2e4ff57c9d15cde84595c 100644 --- a/lisa/analysis/tasks.py +++ b/lisa/analysis/tasks.py @@ -15,19 +15,98 @@ # limitations under the License. # -""" Tasks Analysis Module """ +from enum import Enum -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt import numpy as np import pandas as pd -import pylab as pl -import re -from lisa.analysis.base import AnalysisBase +from lisa.analysis.base import AnalysisBase, requires_events from lisa.utils import memoized -from trappy.utils import listify +class StateInt(int): + """ + An tweaked int for :class:`lisa.analysis.tasks.TaskState` + """ + def __new__(cls, value, char="", doc=""): + new = super().__new__(cls, value) + new.char = char + new.__doc__ = doc + return new + + def __or__(self, other): + char = self.char + + if other.char: + char = "|".join(char + other.char) + + return type(self)( + int(self) | int(other), + char=(self.char + other.char)) + +class TaskState(StateInt, Enum): + """ + Represents the task state as visible in sched_switch + + * Values are extracted from include/linux/sched.h + * Chars are extracted from fs/proc/array.c:get_task_state() + """ + #pylint-suppress: bad-whitespace + TASK_RUNNING = 0x0000, "R", "Running" + TASK_INTERRUPTIBLE = 0x0001, "S", "Sleeping" + TASK_UNINTERRUPTIBLE = 0x0002, "D", "Disk sleep" + # __ has a special meaning in Python so let's not do that + TASK_STOPPED = 0x0004, "T", "Stopped" + TASK_TRACED = 0x0008, "t", "Tracing stop" + + EXIT_DEAD = 0x0010, "X", "Dead" + EXIT_ZOMBIE = 0x0020, "Z", "Zombie" + + # Apparently not visible in traces + # EXIT_TRACE = (EXIT_ZOMBIE[0] | EXIT_DEAD[0]) + + TASK_PARKED = 0x0040, "P", "Parked" + TASK_DEAD = 0x0080, "I", "Idle" + TASK_WAKEKILL = 0x0100 + TASK_WAKING = 0x0200, "W", "Waking" # LISA-only char definition + TASK_NOLOAD = 0x0400 + TASK_NEW = 0x0800 + TASK_STATE_MAX = 0x1000 + + # LISA-only, used to differenciate runnable (R) vs running (A) + TASK_ACTIVE = 0x2000, "A", "Active" + + @classmethod + def list_reported_states(cls): + """ + List the states that can be reported in a ``sched_switch`` trace + + See include/linux/sched.h:TASK_REPORT + """ + return [state for state in list(cls) if state <= cls.TASK_DEAD] + + # Could use IntFlag instead once we move to Python 3.6 + @classmethod + def sched_switch_str(cls, value): + """ + Get the task state string that would be used in a ``sched_switch`` event + + :param value: The task state value + :type value: int + + Tries to emulate what is done in include/trace/events:TRACE_EVENT(sched_switch) + """ + if any([value & state.value for state in cls.list_reported_states()]): + res = "|".join([state.char for state in cls.list_reported_states() + if state.value & value]) + else: + res = cls.TASK_RUNNING.char + + # Flag the presence of unreportable states with a "+" + if any([value & state.value for state in list(cls) + if state not in cls.list_reported_states()]): + res += "+" + + return res class TasksAnalysis(AnalysisBase): """ @@ -42,72 +121,29 @@ class TasksAnalysis(AnalysisBase): def __init__(self, trace): super(TasksAnalysis, self).__init__(trace) - ############################################################################### # DataFrame Getter Methods ############################################################################### - def df_top_big_tasks(self, min_samples=100, min_utilization=None): + @requires_events(['sched_wakeup']) + def df_tasks_wakeups(self): """ - Tasks which had 'utilization' samples bigger than the specified - threshold + The number of wakeups per task - :param min_samples: minumum number of samples over the min_utilization - :type min_samples: int + :returns: a :class:`pandas.DataFrame` with: - :param min_utilization: minimum utilization used to filter samples - default: capacity of a little cluster - :type min_utilization: int + * Task PIDs as index + * A ``wakeups`` column (The number of wakeups) """ - if self.df_load() is None: - self._log.warning('No trace events for task signals, plot DISABLED') - return None - - if min_utilization is None: - min_utilization = self._little_cap - - # Get utilization samples >= min_utilization - df = self.df_load() - big_tasks_events = df[df.util_avg > min_utilization] - if not len(big_tasks_events): - self._log.warning('No tasks with with utilization samples > %d', - min_utilization) - return None - - # Report the number of tasks which match the min_utilization condition - big_tasks = big_tasks_events.pid.unique() - self._log.info('%5d tasks with samples of utilization > %d', - len(big_tasks), min_utilization) - - # Compute number of samples above threshold - desc = big_tasks_events.groupby('pid').describe(include=['object']) - if isinstance(desc.index, pd.MultiIndex): - # We must be running on a pre-0.20.0 version of pandas. - # unstack will convert the old output format to the new. - # http://pandas.pydata.org/pandas-docs/version/0.20/whatsnew.html#groupby-describe-formatting - desc = desc.unstack() - big_tasks_stats = desc['comm'].sort_values(by=['count'], ascending=False) - - # Filter for number of occurrences - big_tasks_stats = big_tasks_stats[big_tasks_stats['count'] > min_samples] - if not len(big_tasks_stats): - self._log.warning(' but none with more than %d samples', - min_samples) - return None - - self._log.info(' %d with more than %d samples', - len(big_tasks_stats), min_samples) - - # Add task name column - big_tasks_stats['comm'] = big_tasks_stats.index.map( - lambda pid: self._trace.getTaskByPid(pid)) + df = self._trace.df_events('sched_wakeup') - # Filter columns of interest - big_tasks_stats = big_tasks_stats[['count', 'comm']] - big_tasks_stats.rename(columns={'count': 'samples'}, inplace=True) + wakeups = df.groupby('pid').count()["comm"] + df = pd.DataFrame(wakeups).rename(columns={"comm" : "wakeups"}) + df["comm"] = df.index.map(self._trace.get_task_by_pid) - return big_tasks_stats + return df + @requires_events(df_tasks_wakeups.required_events) def df_top_wakeup(self, min_wakeups=100): """ Tasks which wakeup more frequently than a specified threshold. @@ -115,53 +151,33 @@ class TasksAnalysis(AnalysisBase): :param min_wakeups: minimum number of wakeups :type min_wakeups: int """ - if not self._trace.hasEvents('sched_wakeup'): - self._log.warning('Events [sched_wakeup] not found') - return None - - df = self._trace.df_events('sched_wakeup') + df = self.df_tasks_wakeups() - # Compute number of wakeups above threshold - wkp_tasks_stats = df.groupby('pid').describe(include=['object']) - wkp_tasks_stats = wkp_tasks_stats.unstack()['comm']\ - .sort_values(by=['count'], ascending=False) - - # Filter for number of occurrences - wkp_tasks_stats = wkp_tasks_stats[ - wkp_tasks_stats['count'] > min_wakeups] - if not len(df): - self._log.warning('No tasks with more than %d wakeups', - len(wkp_tasks_stats)) - return None - self._log.info('%5d tasks with more than %d wakeups', - len(df), len(wkp_tasks_stats)) - - # Add task name column - wkp_tasks_stats['comm'] = wkp_tasks_stats.index.map( - lambda pid: self._trace.getTaskByPid(pid)) - - # Filter columns of interest - wkp_tasks_stats = wkp_tasks_stats[['count', 'comm']] - wkp_tasks_stats.rename(columns={'count': 'samples'}, inplace=True) + # Compute number of samples above threshold + df = df[df.wakeups > min_wakeups] + df = df.sort_values(by="wakeups", ascending=False) - return wkp_tasks_stats + return df + @requires_events(['sched_switch']) def df_rt_tasks(self, min_prio=100): """ Tasks with RT priority - NOTE: priorities uses scheduler values, thus: the lower the value the - higher is the task priority. - RT Priorities: [ 0..100] - FAIR Priorities: [101..120] + .. note:: priorities uses scheduler values, thus: the lower the value the + higher is the task priority. + RT Priorities: [ 0..100] + FAIR Priorities: [101..120] - :param min_prio: minumum priority + :param min_prio: minimum priority :type min_prio: int - """ - if not self._trace.hasEvents('sched_switch'): - self._log.warning('Events [sched_switch] not found') - return None + :returns: a :class:`pandas.DataFrame` with: + + * Task PIDs as index + * A ``prio`` column (The priority of the task) + * A ``comm`` column (The name of the task) + """ df = self._trace.df_events('sched_switch') # Filters tasks which have a priority bigger than threshold @@ -169,635 +185,316 @@ class TasksAnalysis(AnalysisBase): # Filter columns of interest rt_tasks = df[['next_pid', 'next_prio']] - - # Remove all duplicateds rt_tasks = rt_tasks.drop_duplicates() # Order by priority - rt_tasks.sort_values(by=['next_prio', 'next_pid'], ascending=True, - inplace=True) - rt_tasks.rename(columns={'next_pid': 'pid', 'next_prio': 'prio'}, - inplace=True) + rt_tasks.sort_values( + by=['next_prio', 'next_pid'], ascending=True, inplace=True) + rt_tasks.rename( + columns={'next_pid': 'pid', 'next_prio': 'prio'}, inplace=True) - # Set PID as index rt_tasks.set_index('pid', inplace=True) - - # Add task name column - rt_tasks['comm'] = rt_tasks.index.map( - lambda pid: self._trace.getTaskByPid(pid)) + rt_tasks['comm'] = rt_tasks.index.map(self._trace.get_task_by_pid) return rt_tasks - def df_load(self): + @requires_events(['sched_switch', 'sched_wakeup']) + def df_task_states(self, task): """ - Get a DataFrame with the scheduler's per-task load-tracking signals + DataFrame of task's state updates events - Parse the relevant trace event and return a DataFrame with the - scheduler's load tracking update events for each task. + :param task: The task's name or PID + :type task: int or str - :returns: DataFrame with at least the following columns: - 'comm', 'pid', 'load_avg', 'util_avg'. + :returns: a :class:`pandas.DataFrame` with: + + * A ``target_cpu`` column (the CPU where the task has been scheduled). + Will be ``NaN`` for non-wakeup events + * A ``curr_state`` column (the current task state, see :class:`~TaskState`) + * A ``next_state`` column (the next task state, see :class:`~TaskState`) + * A ``delta`` column (the duration for which the task will remain in + this state) """ - df = None + pid = self._trace.get_task_pid(task) - if 'sched_load_avg_task' in self._trace.available_events: - df = self._trace.df_events('sched_load_avg_task') + wk_df = self._trace.df_events('sched_wakeup') + sw_df = self._trace.df_events('sched_switch') - elif 'sched_load_se' in self._trace.available_events: - df = self._trace.df_events('sched_load_se') - df = df.rename(columns={'util': 'util_avg', 'load': 'load_avg'}) - # In sched_load_se, PID shows -1 for task groups. - df = df[df.pid != -1] + if "sched_wakeup_new" in self._trace.events: + wkn_df = self._trace.df_events('sched_wakeup_new') + wk_df = pd.concat([wk_df, wkn_df]).sort_index() - if not self._trace.has_big_little: - return df + task_wakeup = wk_df[wk_df.pid == pid][['target_cpu', '__cpu']] + task_wakeup['curr_state'] = TaskState.TASK_WAKING.char - df['cluster'] = np.select( - [df.cpu.isin(self._trace.plat_info['clusters']['little'])], - ['LITTLE'], 'big') + task_switches_df = sw_df[ + (sw_df.prev_pid == pid) | + (sw_df.next_pid == pid) + ][['__cpu', 'prev_pid', 'prev_state']] - if 'nrg-model' in self._trace.plat_info: - # Add a column which represents the max capacity of the smallest - # clustre which can accomodate the task utilization - little_cap = self._trace.plat_info['nrg-model']['little']['cpu']['cap_max'] - big_cap = self._trace.plat_info['nrg-model']['big']['cpu']['cap_max'] - df['min_cluster_cap'] = df.util_avg.map( - lambda util_avg: big_cap if util_avg > little_cap else little_cap - ) + def stringify_row_state(row): + if row.prev_pid != pid: + # This is a switch-in event + # (we don't care about the status of a task we are replacing) + return TaskState.TASK_ACTIVE.char - return df + return TaskState.sched_switch_str(row.prev_state) + + task_switches_df.prev_state = task_switches_df.apply( + stringify_row_state, axis=1) + + task_switches_df = task_switches_df.drop(columns=["prev_pid"]) + + task_switches_df.rename(columns={'prev_state' : 'curr_state'}, inplace=True) + + # Integer values are prefered here, otherwise the whole column + # is converted to float64 + task_switches_df['target_cpu'] = -1 + + task_state_df = task_wakeup.append(task_switches_df, sort=True).sort_index() + + task_state_df.rename(columns={'__cpu' : 'cpu'}, inplace=True) + task_state_df = task_state_df[['target_cpu', 'cpu', 'curr_state']] + task_state_df['next_state'] = task_state_df.curr_state.shift(-1) + self._trace.add_events_deltas(task_state_df, inplace=True) + + return task_state_df + + @requires_events(df_task_states.required_events) + def df_task_total_residency(self, task): + """ + DataFrame of a task's execution time on each CPU + + :param task: the task to report runtimes for + :type task: int or str + + :returns: a :class:`pandas.DataFrame` with: + + * CPU IDs as index + * A ``runtime`` column (the time the task spent being active) + """ + cpus = set(range(self._trace.plat_info['cpus-count'])) + + df = self.df_task_states(task) + df = df[df.curr_state == TaskState.TASK_ACTIVE.char] + + residency_df = pd.DataFrame(df.groupby("cpu")["delta"].sum()) + residency_df.rename(columns={"delta" : "runtime"}, inplace=True) + + cpus_present = set(residency_df.index.unique()) + + for cpu in cpus.difference(cpus_present): + residency_df.loc[cpu] = 0. + + residency_df.sort_index(inplace=True) + + return residency_df ############################################################################### # Plotting Methods ############################################################################### - def plot_tasks(self, tasks, signals=None): + @requires_events(['sched_switch']) + def plot_task_residency(self, task, filepath=None): """ - Generate a common set of useful plots for each of the specified tasks - - This method allows to filter which signals should be plot, if data are - available in the input trace. The list of signals supported are: - Tasks signals plot: - load_avg, util_avg, boosted_util, sched_overutilized - Tasks residencies on CPUs: - residencies, sched_overutilized - Tasks PELT signals: - load_sum, util_sum, period_contrib, sched_overutilized - - At least one of the previous signals must be specified to get a valid - plot. - - Addidional custom signals can be specified and they will be represented - in the "Task signals plots" if they represent valid keys of the task - load/utilization trace event (e.g. sched_load_avg_task). - - Note: - sched_overutilized: enable the plotting of overutilization bands on - top of each subplot - residencies: enable the generation of the CPUs residencies plot - - :param tasks: the list of task names and/or PIDs to plot. - Numerical PIDs and string task names can be mixed - in the same list. - :type tasks: list(str) or list(int) - - :param signals: list of signals (and thus plots) to generate - default: all the plots and signals available in the - current trace - :type signals: list(str) + Plot on which CPUs the task ran on over time + + :param task: """ - if not signals: - signals = ['load_avg', 'util_avg', 'boosted_util', - 'sched_overutilized', - 'load_sum', 'util_sum', 'period_contrib', - 'residencies'] - - # Check for the minimum required signals to be available - if self.df_load() is None: - self._log.warning('No trace events for task signals, plot DISABLED') - return - - # Defined list of tasks to plot - if tasks and \ - not isinstance(tasks, str) and \ - not isinstance(tasks, list): - raise ValueError('Wrong format for tasks parameter') - - if tasks: - tasks_to_plot = listify(tasks) - else: - raise ValueError('No tasks to plot specified') - - # Compute number of plots to produce - plots_count = 0 - plots_signals = [ - # Fist plot: task's utilization - {'load_avg', 'util_avg', 'boosted_util'}, - # Second plot: task residency - {'residencies'}, - # Third plot: tasks's load - {'load_sum', 'util_sum', 'period_contrib'} - ] - hr = [] - ysize = 0 - for plot_id, signals_to_plot in enumerate(plots_signals): - signals_to_plot = signals_to_plot.intersection(signals) - if len(signals_to_plot): - plots_count = plots_count + 1 - # Use bigger size only for the first plot - hr.append(3 if plot_id == 0 else 1) - ysize = ysize + (8 if plot_id else 4) - - # Grid - gs = gridspec.GridSpec(plots_count, 1, height_ratios=hr) - gs.update(wspace=0.1, hspace=0.1) - - # Build list of all PIDs for each task_name to plot - pids_to_plot = [] - for task in tasks_to_plot: - # Add specified PIDs to the list - if isinstance(task, int): - pids_to_plot.append(task) - continue - # Otherwise: add all the PIDs for task with the specified name - pids_to_plot.extend(self._trace.getTaskByName(task)) - - for tid in pids_to_plot: - savefig = False - - task_name = self._trace.getTaskByPid(tid) - self._log.info('Plotting [%d:%s]...', tid, task_name) - plot_id = 0 - - # For each task create a figure with plots_count plots - plt.figure(figsize=(16, ysize)) - plt.suptitle('Task Signals', - y=.94, fontsize=16, horizontalalignment='center') - - # Plot load and utilization - signals_to_plot = {'load_avg', 'util_avg', 'boosted_util'} - signals_to_plot = list(signals_to_plot.intersection(signals)) - if len(signals_to_plot) > 0: - axes = plt.subplot(gs[plot_id, 0]) - axes.set_title('Task [{0:d}:{1:s}] Signals' - .format(tid, task_name)) - plot_id = plot_id + 1 - is_last = (plot_id == plots_count) - self._plot_task_signals(axes, tid, signals, is_last) - savefig = True - - # Plot CPUs residency - signals_to_plot = {'residencies'} - signals_to_plot = list(signals_to_plot.intersection(signals)) - if len(signals_to_plot) > 0: - if not self._trace.has_big_little: - self._log.warning( - 'No big.LITTLE platform data, residencies plot disabled') + fig, axis = self.setup_plot() + + pid = self._trace.get_task_pid(task) + + sw_df = self._trace.df_events("sched_switch") + sw_df = sw_df[sw_df.next_pid == pid] + + if "freq-domains" in self._trace.plat_info: + # If we are aware of frequency domains, use one color per domain + for domain in self._trace.plat_info["freq-domains"]: + df = sw_df[sw_df["__cpu"].isin(domain)]["__cpu"] + + print(domain) + + if df.empty: + print(df.empty) + # Cycle the colours to stay consistent + self.cycle_colors(axis, 1) else: - axes = plt.subplot(gs[plot_id, 0]) - axes.set_title( - 'Task [{0:d}:{1:s}] Residency (green: LITTLE, red: big)' - .format(tid, task_name) - ) - plot_id = plot_id + 1 - is_last = (plot_id == plots_count) - if 'sched_overutilized' in signals: - signals_to_plot.append('sched_overutilized') - self._plot_task_residencies(axes, tid, signals_to_plot, is_last) - savefig = True - - # Plot PELT signals - signals_to_plot = {'load_sum', 'util_sum', 'period_contrib'} - signals_to_plot = list(signals_to_plot.intersection(signals)) - if len(signals_to_plot) > 0: - axes = plt.subplot(gs[plot_id, 0]) - axes.set_title('Task [{0:d}:{1:s}] PELT Signals' - .format(tid, task_name)) - plot_id = plot_id + 1 - if 'sched_overutilized' in signals: - signals_to_plot.append('sched_overutilized') - self._plot_task_pelt(axes, tid, signals_to_plot) - savefig = True - - if not savefig: - self._log.warning('Nothing to plot for %s', task_name) - continue - - # Save generated plots into datadir - if isinstance(task_name, list): - task_name = re.sub('[:/]', '_', task_name[0]) - else: - task_name = re.sub('[:/]', '_', task_name) - figname = '{}/{}task_util_{}_{}.png'\ - .format(self._trace.plots_dir, self._trace.plots_prefix, - tid, task_name) - pl.savefig(figname, bbox_inches='tight') - - def plot_big_tasks(self, max_tasks=10, min_samples=100, - min_utilization=None): - """ - For each big task plot utilization and show the smallest cluster - capacity suitable for accommodating task utilization. + print(df.unique()) + df.plot(ax=axis, style='+', + label="Task running in domain {}".format(domain)) + else: + sw_df["__cpu"].plot(ax=axis, style='+') + + plot_overutilized = self._trace.analysis.status.plot_overutilized + if self._trace.hasEvents(plot_overutilized.required_events): + plot_overutilized(axis=axis) - :param max_tasks: maximum number of tasks to consider - :type max_tasks: int + # Add an extra CPU lane to make room for the legend + axis.set_ylim(-0.95, self._trace.cpus_count - 0.05) - :param min_samples: minumum number of samples over the min_utilization - :type min_samples: int + axis.set_title("CPU residency of task \"{}\"".format(task)) + axis.set_ylabel('CPUs') + axis.grid(True) + axis.legend() + axis.set_xlim(self._trace.x_min, self._trace.x_max) - :param min_utilization: minimum utilization used to filter samples - default: capacity of a little cluster - :type min_utilization: int + self.save_plot(fig, filepath) + + return axis + + @requires_events(df_task_total_residency.required_events) + def plot_task_total_residency(self, task, filepath=None): """ + Plot a task's total time spent on each CPU - # Get PID of big tasks - big_frequent_task_df = self.df_top_big_tasks( - min_samples, min_utilization) - if big_frequent_task_df is None: - # (Logged already) - return - - if max_tasks > 0: - big_frequent_task_df = big_frequent_task_df.head(max_tasks) - big_frequent_task_pids = big_frequent_task_df.index.values - - big_frequent_tasks_count = len(big_frequent_task_pids) - if big_frequent_tasks_count == 0: - self._log.warning('No big/frequent tasks to plot') - return - - # Get the list of events for all big frequent tasks - df = self.df_load() - big_frequent_tasks_events = df[df.pid.isin(big_frequent_task_pids)] - - # Define axes for side-by-side plottings - fig, axes = plt.subplots(big_frequent_tasks_count, 1, - figsize=(16, big_frequent_tasks_count*4)) - plt.subplots_adjust(wspace=0.1, hspace=0.2) - - plot_idx = 0 - for pid, group in big_frequent_tasks_events.groupby('pid'): - - # # Build task names (there could be multiple, during the task lifetime) - task_name = 'Task [%d:%s]'.format(pid, self._trace.getTaskByPid(pid)) - - # Plot title - if big_frequent_tasks_count == 1: - ax = axes - else: - ax = axes[plot_idx] - ax.set_title(task_name) - - # Left axis: utilization - ax = group.plot(y=['util_avg', 'min_cluster_cap'], - style=['r.', '-b'], - drawstyle='steps-post', - linewidth=1, - ax=ax) - ax.set_xlim(self._trace.x_min, self._trace.x_max) - ax.set_ylim(0, 1100) - ax.set_ylabel('util_avg') - ax.set_xlabel('') - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - plot_idx += 1 - - ax.set_xlabel('Time [s]') - - self._log.info('Tasks which have been a "utilization" of %d for at least %d samples', - self._little_cap, min_samples) - - def plot_wakeup(self, max_tasks=10, min_wakeups=0, per_cluster=False): + :param task: The task's name or PID + :type task: str or int """ - Show waking up tasks over time and newly forked tasks in two separate - plots. + fig, axis = self.setup_plot(height=8) - :param max_tasks: maximum number of tasks to consider - :param max_tasks: int + df = self.df_task_total_residency(task) - :param min_wakeups: minimum number of wakeups of each task - :type min_wakeups: int + df["runtime"].plot.bar(ax=axis) + axis.set_title("CPU residency of task \"{}\"".format(task)) + axis.set_xlabel("CPU") + axis.set_ylabel("Runtime (s)") + axis.grid(True) - :param per_cluster: if True get per-cluster wakeup events - :type per_cluster: bool + self.save_plot(fig, filepath) + + return axis + + def _df_discretize_series(self, series, time_delta, name): """ - if per_cluster is True and \ - not self._trace.hasEvents('sched_wakeup_new'): - self._log.warning('Events [sched_wakeup_new] not found, ' - 'plots DISABLED!') - return - elif not self._trace.hasEvents('sched_wakeup') and \ - not self._trace.hasEvents('sched_wakeup_new'): - self._log.warning('Events [sched_wakeup, sched_wakeup_new] not found, ' - 'plots DISABLED!') - return - - # Define axes for side-by-side plottings - fig, axes = plt.subplots(2, 1, figsize=(14, 5)) - plt.subplots_adjust(wspace=0.2, hspace=0.3) - - if per_cluster: - - # Get per cluster wakeup events - df = self._trace.df_events('sched_wakeup_new') - big_frequent = df.target_cpu.isin(self._big_cpus) - ntbc = df[big_frequent] - ntbc_count = len(ntbc) - little_frequent = df.target_cpu.isin(self._little_cpus) - ntlc = df[little_frequent]; - ntlc_count = len(ntlc) - - self._log.info('%5d tasks forked on big cluster (%3.1f %%)', - ntbc_count, - 100. * ntbc_count / (ntbc_count + ntlc_count)) - self._log.info('%5d tasks forked on LITTLE cluster (%3.1f %%)', - ntlc_count, - 100. * ntlc_count / (ntbc_count + ntlc_count)) - - ax = axes[0] - ax.set_title('Tasks Forks on big CPUs'); - ntbc.pid.plot(style=['g.'], ax=ax); - ax.set_xlim(self._trace.x_min, self._trace.x_max); - ax.set_xticklabels([]) - ax.set_xlabel('') - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - ax = axes[1] - ax.set_title('Tasks Forks on LITTLE CPUs'); - ntlc.pid.plot(style=['g.'], ax=ax); - ax.set_xlim(self._trace.x_min, self._trace.x_max); - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - return - - # Keep events of defined big tasks - wkp_task_pids = self.df_top_wakeup(min_wakeups) - if len(wkp_task_pids): - wkp_task_pids = wkp_task_pids.index.values[:max_tasks] - self._log.info('Plotting %d frequent wakeup tasks', - len(wkp_task_pids)) - - ax = axes[0] - ax.set_title('Tasks WakeUps Events') - df = self._trace.df_events('sched_wakeup') - if len(df): - df = df[df.pid.isin(wkp_task_pids)] - df.pid.astype(int).plot(style=['b.'], ax=ax) - ax.set_xlim(self._trace.x_min, self._trace.x_max) - ax.set_xticklabels([]) - ax.set_xlabel('') - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - ax = axes[1] - ax.set_title('Tasks Forks Events') - df = self._trace.df_events('sched_wakeup_new') - if len(df): - df = df[df.pid.isin(wkp_task_pids)] - df.pid.astype(int).plot(style=['r.'], ax=ax) - ax.set_xlim(self._trace.x_min, self._trace.x_max) - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - def plot_big_tasks_vs_capacity(self, min_samples=1, - min_utilization=None, big_cluster=True): + Discrete the contents of ``series`` in ``time_delta`` buckets """ - Draw a plot that shows whether tasks are placed on the correct cluster - based on their utilization and cluster capacity. Green dots mean the - task was placed on the correct cluster, Red means placement was wrong + left = self._trace.x_min + data = [] + index = [] + for right in np.arange(left + time_delta, self._trace.x_max, time_delta): + index.append(left) + data.append(series[left:right].count()) + left = right - :param min_samples: minumum number of samples over the min_utilization - :type min_samples: int + return pd.DataFrame(data=data, index=index, columns=[name]) - :param min_utilization: minimum utilization used to filter samples - default: capacity of a little cluster - :type min_utilization: int + def _plot_cpu_heatmap(self, x, y, xbins, colorbar_label, **kwargs): + """ + Plot some data in a heatmap-style 2d histogram + """ + nr_cpus = self._trace.cpus_count + fig, axis = self.setup_plot(height=min(4, nr_cpus // 2), width=20) + + _, _, _, img = axis.hist2d(x, y, bins=[xbins, nr_cpus], **kwargs) + fig.colorbar(img, label=colorbar_label) - :param big_cluster: - :type big_cluster: bool + return fig, axis + + @requires_events(["sched_wakeup"]) + def plot_tasks_wakeups(self, target_cpus=None, time_delta=0.01, filepath=None): """ + Plot task wakeups over time - if not self._trace.hasEvents('cpu_frequency'): - self._log.warning('Events [cpu_frequency] not found') - return + :param target_cpus: + :type target_cpus: - # Get all utilization update events - df = self.df_load() - if df is None: - self._log.warning('No trace events for task signals, plot DISABLED') - return + :param time_delta: The discretization delta for summing up wakeups in a + given time delta. + :type time_delta: float + """ + fig, axis = self.setup_plot() - if big_cluster: - cluster_correct = 'big' - cpus = self._big_cpus - else: - cluster_correct = 'LITTLE' - cpus = self._little_cpus - - # Keep events of defined big tasks - big_task_pids = self.df_top_big_tasks( - min_samples, min_utilization) - if big_task_pids is not None: - big_task_pids = big_task_pids.index.values - df = df[df.pid.isin(big_task_pids)] - if not df.size: - self._log.warning('No events for tasks with more then %d utilization ' - 'samples bigger than %d, plots DISABLED!') - return - - fig, axes = plt.subplots(2, 1, figsize=(14, 5)) - plt.subplots_adjust(wspace=0.2, hspace=0.3) - - # Add column of expected cluster depending on: - # a) task utilization value - # b) capacity of the selected cluster - bu_bc = ((df['util_avg'] > self._little_cap) & - (df['cpu'].isin(self._big_cpus))) - su_lc = ((df['util_avg'] <= self._little_cap) & - (df['cpu'].isin(self._little_cpus))) - - # The Cluster CAPacity Matches the UTILization (ccap_mutil) iff: - # - tasks with util_avg > little_cap are running on a BIG cpu - # - tasks with util_avg <= little_cap are running on a LITTLe cpu - df.loc[:,'ccap_mutil'] = np.select([(bu_bc | su_lc)], [True], False) - - df_freq = self._trace.df_events('cpu_frequency') - df_freq = df_freq[df_freq.cpu == cpus[0]] - - ax = axes[0] - ax.set_title('Tasks Utilization vs Allocation') - for ucolor, umatch in zip('gr', [True, False]): - cdata = df[df['ccap_mutil'] == umatch] - if len(cdata) > 0: - cdata['util_avg'].plot(ax=ax, - style=[ucolor+'.'], legend=False) - ax.set_xlim(self._trace.x_min, self._trace.x_max) - ax.set_xticklabels([]) - ax.set_xlabel('') - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - ax = axes[1] - ax.set_title('Frequencies on "{}" cluster'.format(cluster_correct)) - df_freq['frequency'].plot(style=['-b'], ax=ax, drawstyle='steps-post') - ax.set_xlim(self._trace.x_min, self._trace.x_max); - ax.grid(True) - self._trace.analysis.status.plot_overutilized(ax) - - legend_y = axes[0].get_ylim()[1] - axes[0].annotate('Utilization-Capacity Matches', - xy=(0, legend_y), - xytext=(-50, 45), textcoords='offset points', - fontsize=18) - axes[0].annotate('Task schduled (green) or not (red) on min cluster', - xy=(0, legend_y), - xytext=(-50, 25), textcoords='offset points', - fontsize=14) + df = self._trace.df_events("sched_wakeup") + if target_cpus: + df = df[df.target_cpu.isin(target_cpus)] -############################################################################### -# Utility Methods -############################################################################### + df = self._df_discretize_series(df["target_cpu"], time_delta, "Wakeup count") + df.plot(ax=axis, legend=False) + + axis.set_title("Number of task wakeups within {}s windows".format(time_delta)) + axis.set_xlim(self._trace.x_min, self._trace.x_max) + + self.save_plot(fig, filepath) + + return axis + + @requires_events(["sched_wakeup"]) + def plot_tasks_wakeups_heatmap(self, xbins=100, colormap=None, filepath=None): + """ + :param xbins: Number of x-axis bins, i.e. in how many slices should + time be arranged + :type xbins: int - def _plot_task_signals(self, axes, tid, signals, is_last=False): + :param colormap: The name of a colormap (see + https://matplotlib.org/users/colormaps.html), or a Colormap object + :type colormap: str or matplotlib.colors.Colormap """ - For task with ID `tid` plot the specified signals. - :param axes: axes over which to generate the plot - :type axes: :mod:`matplotlib.axes.Axes` + df = self._trace.df_events("sched_wakeup") + + fig, axis = self._plot_cpu_heatmap( + df.index, df.target_cpu, xbins, "Number of wakeups", cmap=colormap) + + axis.set_title("Tasks wakeups over time") + axis.set_xlim(self._trace.x_min, self._trace.x_max) - :param tid: task ID - :type tid: int + self.save_plot(fig, filepath) - :param signals: signals to be plot - :param signals: list(str) + return axis - :param is_last: if True this is the last plot - :type is_last: bool + @requires_events(["sched_wakeup_new"]) + def plot_tasks_forks(self, target_cpus=None, time_delta=0.01, filepath=None): """ - # Get dataframe for the required task - util_df = self.df_load() - if util_df is None: - self._log.warning('No trace events for task signals, plot DISABLED') - return - - # Plot load and util - signals_to_plot = set(signals).difference({'boosted_util'}) - for signal in signals_to_plot: - if signal not in util_df.columns: - continue - data = util_df[util_df.pid == tid][signal] - data.plot(ax=axes, drawstyle='steps-post', legend=True) - - # Plot boost utilization if available - if 'boosted_util' in signals and \ - self._trace.hasEvents('sched_boost_task'): - boost_df = self._trace.df_events('sched_boost_task') - data = boost_df[boost_df.pid == tid][['boosted_util']] - if len(data): - data.plot(ax=axes, style=['y-'], drawstyle='steps-post') - else: - task_name = self._trace.getTaskByPid(tid) - self._log.warning('No "boosted_util" data for task [%d:%s]', - tid, task_name) - - # Add Capacities data if avilable - if 'nrg-model' in self._trace.plat_info: - nrg_model = self._trace.plat_info['nrg-model'] - max_lcap = nrg_model['little']['cpu']['cap_max'] - max_bcap = nrg_model['big']['cpu']['cap_max'] - tip_lcap = 0.8 * max_lcap - tip_bcap = 0.8 * max_bcap - self._log.debug( - 'LITTLE capacity tip/max: %d/%d, big capacity tip/max: %d/%d', - tip_lcap, max_lcap, tip_bcap, max_bcap - ) - axes.axhline(tip_lcap, color='y', linestyle=':', linewidth=2) - axes.axhline(max_lcap, color='y', linestyle='--', linewidth=2) - axes.axhline(tip_bcap, color='r', linestyle=':', linewidth=2) - axes.axhline(max_bcap, color='r', linestyle='--', linewidth=2) - - axes.set_ylim(0, 1100) - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.grid(True) - if not is_last: - axes.set_xticklabels([]) - axes.set_xlabel('') - if 'sched_overutilized' in signals: - self._trace.analysis.status.plot_overutilized(axes) - - def _plot_task_residencies(self, axes, tid, signals, is_last=False): + Plot task forks over time + + :param target_cpus: + :type target_cpus: + + :param time_delta: The discretization delta for summing up forks in a + given time delta. + :type time_delta: float """ - For task with ID `tid` plot residency information. + fig, axis = self.setup_plot() + + df = self._trace.df_events("sched_wakeup_new") - :param axes: axes over which to generate the plot - :type axes: :mod:`matplotlib.axes.Axes` + if target_cpus: + df = df[df.target_cpu.isin(target_cpus)] - :param tid: task ID - :type tid: int + df = self._df_discretize_series(df["target_cpu"], time_delta, "Forks count") + df.plot(ax=axis, legend=False) - :param signals: signals to be plot - :param signals: list(str) + axis.set_title("Number of task forks within {}s windows".format(time_delta)) + axis.set_xlim(self._trace.x_min, self._trace.x_max) - :param is_last: if True this is the last plot - :type is_last: bool + self.save_plot(fig, filepath) + + return axis + + @requires_events(["sched_wakeup_new"]) + def plot_tasks_forks_heatmap(self, xbins=100, colormap=None, filepath=None): """ - util_df = self.df_load() - if util_df is None: - self._log.warning('No trace events for task signals, plot DISABLED') - return - data = util_df[util_df.pid == tid][['cluster', 'cpu']] - for ccolor, clabel in zip('gr', ['LITTLE', 'big']): - cdata = data[data.cluster == clabel] - if len(cdata) > 0: - cdata.plot(ax=axes, style=[ccolor+'+'], legend=False) - # Y Axis - placeholders for legend, acutal CPUs. topmost empty lane - cpus = [str(n) for n in range(self._trace.plat_info['cpus-count'])] - ylabels = [''] + cpus - axes.set_yticklabels(ylabels) - axes.set_ylim(-1, len(cpus)) - axes.set_ylabel('CPUs') - # X Axis - axes.set_xlim(self._trace.x_min, self._trace.x_max) - - axes.grid(True) - if not is_last: - axes.set_xticklabels([]) - axes.set_xlabel('') - if 'sched_overutilized' in signals: - self._trace.analysis.status.plot_overutilized(axes) - - def _plot_task_pelt(self, axes, tid, signals): + :param xbins: Number of x-axis bins, i.e. in how many slices should + time be arranged + :type xbins: int + + :param colormap: The name of a colormap (see + https://matplotlib.org/users/colormaps.html), or a Colormap object + :type colormap: str or matplotlib.colors.Colormap """ - For task with ID `tid` plot PELT-related signals. - :param axes: axes over which to generate the plot - :type axes: :mod:`matplotlib.axes.Axes` + df = self._trace.df_events("sched_wakeup_new") - :param tid: task ID - :type tid: int + fig, axis = self._plot_cpu_heatmap( + df.index, df.target_cpu, xbins, "Number of forks", cmap=colormap) - :param signals: signals to be plot - :param signals: list(str) - """ - if not self._trace.hasEvents('sched_load_avg_task'): - self._log.warning( - 'No sched_load_avg_task events, skipping PELT plot') - return - - util_df = self._trace.df_events('sched_load_avg_task') - data = util_df[util_df.pid == tid][['load_sum', - 'util_sum', - 'period_contrib']] - data.plot(ax=axes, drawstyle='steps-post') - axes.set_xlim(self._trace.x_min, self._trace.x_max) - axes.ticklabel_format(style='scientific', scilimits=(0, 0), - axis='y', useOffset=False) - axes.grid(True) - if 'sched_overutilized' in signals: - self._trace.analysis.status.plot_overutilized(axes) + axis.set_title("Tasks forks over time") + axis.set_xlim(self._trace.x_min, self._trace.x_max) + + self.save_plot(fig, filepath) + + return axis # vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/lisa/analysis/thermal.py b/lisa/analysis/thermal.py index f2b6ff45c3e8a57c2b7a5bc8ceea5f1616fd4f41..011e5e0adfa674a2250953a9f2f2a1b0f2d0eff9 100644 --- a/lisa/analysis/thermal.py +++ b/lisa/analysis/thermal.py @@ -15,22 +15,12 @@ # limitations under the License. # -""" Thermal Analysis Module """ - -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt -import pandas as pd -import pylab as pl -import operator -import os +from matplotlib.ticker import MaxNLocator -from trappy.utils import listify from devlib.utils.misc import list_to_mask, mask_to_list -from lisa.analysis.base import AnalysisBase, ResidencyTime, ResidencyData +from lisa.analysis.base import AnalysisBase, requires_events from lisa.utils import memoized -from bart.common.Utils import area_under_curve -from matplotlib.ticker import MaxNLocator class ThermalAnalysis(AnalysisBase): @@ -38,245 +28,203 @@ class ThermalAnalysis(AnalysisBase): Support for plotting Thermal Analysis data :param trace: input Trace object - :type trace: lisa.Trace + :type trace: :class:`trace.Trace` """ name = 'thermal' -############################################################################### -# Analysis properties -############################################################################### + @requires_events(["thermal_temperature"]) + def df_thermal_zones_temperature(self): + """ + Get the temperature of the thermal zones + + :returns: a :class:`pandas.DataFrame` with: + + * An ``id`` column (The thermal zone ID) + * A ``thermal_zone`` column (The thermal zone name) + * A ``temp`` column (The reported temperature) + """ + df = self._trace.df_events("thermal") + df = df[['id', 'thermal_zone', 'temp']] + + return df + + @requires_events(["thermal_power_cpu_limit"]) + def df_cpufreq_cooling_state(self, cpus=None): + """ + Get cpufreq cooling device states + + :param cpus: The CPUs to consider (all by default) + :type cpus: list(int) + + :returns: a :class:`pandas.DataFrame` with: + + * An ``cpus`` column (The CPUs affected by the cooling device) + * A ``freq`` column (The frequency limit) + * A ``cdev_state`` column (The cooling device state index) + + """ + df = self._trace.df_events("cpu_out_power") + df = df[['cpus', 'freq', 'cdev_state']] + + if cpus is not None: + # Find masks that match the requested CPUs + # This can include other CPUs + masks = self._matching_masks(cpus) + df = df[df.cpus.isin(masks)] + + return df + + @requires_events(["thermal_power_devfreq_limit"]) + def df_devfreq_cooling_state(self, devices=None): + """ + Get devfreq cooling device states + + :param devices: The devfreq devices to consider (all by default) + :type device: list(str) + + :returns: a :class:`pandas.DataFrame` with: + + * An ``cpus`` column (The CPUs affected by the cooling device) + * A ``freq`` column (The frequency limit) + * A ``cdev_state`` column (The cooling device state index) + """ + df = self._trace.df_events("devfreq_out_power") + df = df[['type', 'freq', 'cdev_state']] + + if devices is not None: + df = df[df.type.isin(devices)] + + return df @property @memoized + @requires_events(df_thermal_zones_temperature.required_events) def thermal_zones(self): """ Get thermal zone ids that appear in the trace """ - df = self._trace.df_events('thermal_temperature') + df = self.df_thermal_zones_temperature() return df["thermal_zone"].unique().tolist() @property @memoized + @requires_events(df_cpufreq_cooling_state.required_events) def cpufreq_cdevs(self): """ Get cpufreq cooling devices that appear in the trace """ - df = self._trace.df_events('thermal_power_cpu_limit') + df = self.df_cpufreq_cooling_state() res = df['cpus'].unique().tolist() return [mask_to_list(mask) for mask in res] @property @memoized + @requires_events(df_devfreq_cooling_state.required_events) def devfreq_cdevs(self): """ Get devfreq cooling devices that appear in the trace """ - df = self._trace.df_events('thermal_power_devfreq_limit') + df = self.df_devfreq_cooling_state() return df['type'].unique().tolist() ############################################################################### -# DataFrame Getter Methods +# Plotting Methods ############################################################################### - def df_thermal_zone_temperature(self, ids=None): + @requires_events(df_thermal_zones_temperature.required_events) + def plot_thermal_zone_temperature(self, thermal_zone_id, filepath=None, axis=None): """ - Get the temperature readings of one or more thermal zone(s) - (all by default) + Plot temperature of thermal zones (all by default) - :param ids: The thermal zones to consider - :type ids: list(int) + :param thermal_zone_id: ID of the zone + :type thermal_zone_id: int """ - df = self._trace.df_events('thermal_temperature') - df = df[['id', 'thermal_zone', 'temp']] + local_fig = axis is None - if ids is not None: - df = df[df.id.isin(ids)] + if local_fig: + fig, axis = self.setup_plot() - return df + df = self.df_thermal_zones_temperature() + df = df[df.id == thermal_zone_id] - def df_cpufreq_cooling_state(self, cpus=None): - """ - Get the cooling states of one or more cpufreq cooling device(s) - (all by default) + tz_name = df.thermal_zone.unique()[0] - :param cpus: The CPUs to consider - :type cpus: list(int) - """ - df = self._trace.df_events('thermal_power_cpu_limit') - df = df[['cpus', 'freq', 'cdev_state']] + df.temp.plot(drawstyle="steps-post", ax=axis, + label="Thermal zone \"{}\"".format(tz_name)) - if cpus is not None: - # Find masks that match the requested CPUs - # This can include other CPUs - masks = self._matching_masks(cpus) - df = df[df.cpus.isin(masks)] + axis.legend() - return df + if local_fig: + axis.grid(True) + axis.set_title("Temperature evolution") + axis.set_ylabel("Temperature (°C.10e3)") + axis.set_xlim(self._trace.x_min, self._trace.x_max) + self.save_plot(fig, filepath) - def df_devfreq_cooling_state(self, devices=None): + return axis + + @requires_events(df_cpufreq_cooling_state.required_events) + def plot_cpu_cooling_states(self, cpu, filepath=None, axis=None): """ - Get the cooling states of one or more devfreq cooling device(s) - (all by default) + Plot the state evolution of a cpufreq cooling device - :param devices: The devfreq devices to consider - :type device: list(str) + :param cpu: The CPU. Whole clusters can be controlled as + a single cooling device, they will be plotted as long this CPU + belongs to the cluster. + :type cpu: int """ - df = self._trace.df_events('thermal_power_devfreq_limit') - df = df[['type', 'freq', 'cdev_state']] + local_fig = axis is None - if devices is not None: - df = df[df.type.isin(devices)] + if local_fig: + fig, axis = self.setup_plot() - return df + df = self.df_cpufreq_cooling_state([cpu]) + cdev_name = "CPUs {}".format(mask_to_list(df.cpus.unique()[0])) + df.cdev_state.plot(drawstyle="steps-post", ax=axis, + label="\"{}\"".format(cdev_name)) -############################################################################### -# Plotting Methods -############################################################################### + axis.legend() - def plot_temperature(self, thermal_zones=None, ax=None): - """ - Plot temperature of thermal zones (all by default) + if local_fig: + axis.grid(True) + axis.set_title("cpufreq cooling devices status") + axis.yaxis.set_major_locator(MaxNLocator(integer=True)) + axis.grid(axis='y') + axis.set_xlim(self._trace.x_min, self._trace.x_max) + self.save_plot(fig, filepath) - Requires the following trace event: - - thermal_temperature + return axis - :param thermal_zones: ID(s) of the zones to be plotted. - All the zones are plotted by default. - IDs can be found in syfs: /sys/class/thermal/thermal_zone - :type thermal_zones: list(int) - """ - if not self._trace.hasEvents('thermal_temperature'): - self._log.warning('Event [{}] not found, plot DISABLED!' - .format('thermal_temperature')) - return - - plot_df = self.df_thermal_zone_temperature(thermal_zones) - - def stringify_tz(id): - return plot_df[plot_df.id == id]['thermal_zone'].unique()[0] - - filters = None if thermal_zones is None else {'thermal_zone' : thermal_zones} - self._plot_generic(plot_df, 'id', filters=filters, columns=['temp'], - prettify_name=stringify_tz, - drawstyle='steps-post', ax=ax - ) - - if thermal_zones is None: - suffix = '' - else: - suffix = '_' + '_'.join(map(str, thermal_zones)) - - # Save generated plots into datadir - figname = os.path.join( - self._trace.plots_dir, - '{}thermal_temperature{}.png'.format( - self._trace.plots_dir, self._trace.plots_prefix, suffix - ) - ) - - pl.savefig(figname, bbox_inches='tight') - - def plot_cpu_cooling_states(self, cpus=None, ax=None): + def plot_dev_freq_cooling_states(self, device, filepath=None, axis=None): """ - Plot the state evolution of cpufreq cooling devices (all by default) + Plot the state evolution of a devfreq cooling device - Requires the following trace event: - - thermal_power_cpu_limit - - :param cpus: list of CPUs to plot. Whole clusters can be controlled as - a single cooling device, they will be plotted as long as one of their - CPUs is in the list. - :type cpus: list(int) + :param device: The devfreq devices to consider + :type device: str """ - if not self._trace.hasEvents('thermal_power_cpu_limit'): - self._log.warning('Event [{}] not found, plot DISABLED!' - .format('thermal_power_cpu_limit')) - return + local_fig = axis is None - plot_df = self._trace.df_events('thermal_power_cpu_limit') + if local_fig: + fig, axis = self.setup_plot() - def stringify_mask(mask): - return 'CPUs {}'.format(mask_to_list(mask)) + df = self.df_devfreq_cooling_state([device]) - # Find masks that match the requested CPUs - # This can include other CPUs - masks = None - if cpus is not None: - masks = self._matching_masks(cpus) + df.cdev_state.plot(drawstyle="steps-post", ax=axis, + label="Device \"{}\"".format(device)) - filters = None if masks is None else {'cpus' : masks} - _ax = self._plot_generic(plot_df, 'cpus', filters=filters, columns=['cdev_state'], - prettify_name=stringify_mask, - drawstyle='steps-post', ax=ax - ) - - if ax is None: - ax = _ax - - # Cdev status is an integer series - ax.yaxis.set_major_locator(MaxNLocator(integer=True)) - ax.grid(axis='y') - - if cpus is None: - suffix = '' - else: - suffix = '_' + '_'.join(map(str, cpus)) - - # Save generated plots into datadir - figname = os.path.join( - self._trace.plots_dir, - '{}thermal_cpufreq_cdev_state{}.png'.format( - self._trace.plots_dir, self._trace.plots_prefix, suffix - ) - ) - pl.savefig(figname, bbox_inches='tight') - - def plot_dev_freq_cooling_states(self, devices=None, ax=None): - """ - Plot the state evolution of devfreq cooling devices (all by default) + axis.legend() - Requires the following trace event: - - thermal_power_devfreq_limit + if local_fig: + axis.grid(True) + axis.set_title("devfreq cooling devices status") + axis.yaxis.set_major_locator(MaxNLocator(integer=True)) + axis.grid(axis='y') + axis.set_xlim(self._trace.x_min, self._trace.x_max) + self.save_plot(fig, filepath) - :param devices: list of devfreq devices to plot. - :type cpus: list(int) - """ - if not self._trace.hasEvents('thermal_power_devfreq_limit'): - self._log.warning('Event [{}] not found, plot DISABLED!' - .format('thermal_power_devfreq_limit')) - return - - plot_df = self._trace.df_events('thermal_power_devfreq_limit') - - # Might have more than one device selected by 'type', but that's - # the best we can do - filters = None if devices is None else {'type' : devices} - _ax = self._plot_generic(plot_df, 'type', filters=filters, columns=['cdev_state'], - drawstyle='steps-post', ax=ax - ) - - if ax is None: - ax = _ax - - # Cdev status is an integer series - ax.yaxis.set_major_locator(MaxNLocator(integer=True)) - ax.grid(axis='y') - - if devices is None: - suffix = '' - else: - suffix = '_' + '_'.join(map(str, devices)) - - # Save generated plots into datadir - figname = os.path.join( - self._trace.plots_dir, - '{}thermal_devfreq_cdev_state{}.png'.format( - self._trace.plots_dir, self._trace.plots_prefix, suffix - ) - ) - pl.savefig(figname, bbox_inches='tight') + return axis ############################################################################### # Utility Methods diff --git a/lisa/env.py b/lisa/env.py index 4d8c1d25f902a8374a28c5669001e14b6b7af7b3..27b0d3dc654b8724187887934113cd7debe087c0 100644 --- a/lisa/env.py +++ b/lisa/env.py @@ -268,10 +268,6 @@ class TestEnv(Loggable, HideExekallID): # computed when actually needed. self.plat_info.add_target_src(self, fallback=True) - # Update the PlatformInfo with keys derived from the energy model - with contextlib.suppress(KeyError): - self.plat_info.add_nrg_model_src() - logger.info('Effective platform information:\n%s', self.plat_info) @classmethod diff --git a/lisa/platforms/platinfo.py b/lisa/platforms/platinfo.py index aaa8ff7f3c2a2ea0e94ad0746c56aef73d0ac7e6..c61b745a1b6780f840245eb0d030ceef8434692a 100644 --- a/lisa/platforms/platinfo.py +++ b/lisa/platforms/platinfo.py @@ -15,18 +15,13 @@ # limitations under the License. # -import inspect -import contextlib -from collections import ChainMap from collections.abc import Mapping -from numbers import Real -from lisa.utils import HideExekallID, memoized, DeferredValue, IntRealDict, IntIntDict, StrIntListDict +from lisa.utils import HideExekallID, memoized, DeferredValue, IntIntDict, IntListList, IntIntListDict, StrIntListDict from lisa.utils import MultiSrcConf, KeyDesc, LevelKeyDesc, TopLevelKeyDesc from lisa.energy_model import EnergyModel from lisa.wlgen.rta import RTA -from trappy.stats.Topology import Topology from devlib.target import KernelVersion from devlib.exception import TargetStableError @@ -55,17 +50,14 @@ class PlatformInfo(MultiSrcConf, HideExekallID): KeyDesc('calib', 'RTapp calibration dictionary', [IntIntDict]), )), KeyDesc('nrg-model', 'Energy model object', [EnergyModel]), - KeyDesc('cpu-capacities', 'Dictionary of CPU ID to capacity value', [IntRealDict]), + KeyDesc('cpu-capacities', 'Dictionary of CPU ID to capacity value', [IntIntDict]), KeyDesc('kernel-version', '', [KernelVersion]), KeyDesc('abi', 'ABI, e.g. "arm64"', [str]), KeyDesc('os', 'OS being used, e.g. "linux"', [str]), KeyDesc('name', 'Free-form name of the board', [str]), - - # TODO remove that once no code depend on it anymore - KeyDesc('topology', 'Compat key: CPU topology', [Topology]), - KeyDesc('clusters', 'Compat key: dictionary of cluster names to list of CPU ID', [StrIntListDict]), KeyDesc('cpus-count', 'Compat key: number of CPUs', [int]), - KeyDesc('freqs', 'Compat key: dictionary of cluster names to list of frequencies', [StrIntListDict]), + KeyDesc('freq-domains', 'Frequency domains', [IntListList]), + KeyDesc('freqs', 'Dictionnary of CPU ID to list of frequencies', [IntIntListDict]), )) """Some keys have a reserved meaning with an associated type.""" @@ -82,66 +74,17 @@ class PlatformInfo(MultiSrcConf, HideExekallID): 'rtapp': { # Since it is expensive to compute, use an on-demand DeferredValue 'calib': DeferredValue(RTA.get_cpu_calibrations, te) - } - } - - if 'sched' in target.modules: - info['cpu-capacities'] = target.sched.get_capacities(default=1024) - - return self.add_src(src, info, filter_none=True, **kwargs) - - #TODO: kill that once code depending on this has been converted to - # using the appropriate "root" data, instead of these derived values. - def add_nrg_model_src(self, nrg_model=None, src='nrg-model', **kwargs): - # Derive all the deprecated keys from the nrg_model - nrg_model = nrg_model or self['nrg-model'] - node_groups = nrg_model.node_groups - - # Sort according to max capacity found in the group - def max_capacity(group): - return max( - s.capacity - for node in group - for s in node.active_states.values() - ) - node_groups = sorted(node_groups, key=max_capacity) - cpu_groups = [ - [node.cpu for node in group] - for group in node_groups - ] - - # big.LITTLE platform - if len(cpu_groups) == 2: - cluster_names = ['little', 'big'] - # SMP platform - else: - cluster_names = [str(i) for i in range(len(cpu_groups))] - clusters = { - name: group - for name, group in zip(cluster_names, cpu_groups) + }, + 'cpus-count': te.target.number_of_cpus } - topology = Topology(clusters=cpu_groups) - cpus_count = sum(len(group) for group in cpu_groups) + if hasattr(target, 'cpufreq'): + info['freq-domains'] = list(target.cpufreq.iter_domains()) + info['freqs'] = {cpu : target.cpufreq.list_frequencies(cpu) + for cpu in range(target.number_of_cpus)} - def freq_list(group): - return sorted(set( - freq - for node in group - for freq in node.active_states.keys() - )) - - freqs = { - cluster_name: freq_list(group) - for cluster_name, group in zip(cluster_names, node_groups) - } - - info = { - 'clusters': clusters, - 'topology': topology, - 'cpus-count': cpus_count, - 'freqs': freqs, - } + if hasattr(target, 'sched'): + info['cpu-capacities'] = target.sched.get_capacities(default=1024) return self.add_src(src, info, filter_none=True, **kwargs) diff --git a/lisa/tests/kernel/scheduler/eas_behaviour.py b/lisa/tests/kernel/scheduler/eas_behaviour.py index f5e6ecb4813feafaa95b37e0b198293def1e7938..efb382e6ab772a61a327f5dd7222538c3e736071 100644 --- a/lisa/tests/kernel/scheduler/eas_behaviour.py +++ b/lisa/tests/kernel/scheduler/eas_behaviour.py @@ -127,8 +127,8 @@ class EASBehaviour(RTATestBundle, abc.ABC): start_time = self.trace.start_time + self.trace.time_range for task in tasks: - pid = self.trace.getTaskByName(task) - assert len(pid) == 1, "getTaskByName returned more than one PID" + pid = self.trace.get_task_by_name(task) + assert len(pid) == 1, "get_task_by_name returned more than one PID" pid = pid[0] start_time = min(start_time, sdf[sdf.next_pid == pid].index[0]) diff --git a/lisa/tests/kernel/scheduler/load_tracking.py b/lisa/tests/kernel/scheduler/load_tracking.py index 9e1eb350f6e2fa478884d3424834604a7274a779..b926451d48c18a4c51271fa3772223c2f76624a3 100644 --- a/lisa/tests/kernel/scheduler/load_tracking.py +++ b/lisa/tests/kernel/scheduler/load_tracking.py @@ -154,34 +154,15 @@ class LoadTrackingBase(RTATestBundle, LoadTrackingHelpers): :returns: :class:`pandas.DataFrame` with a column for each signal for the workload task """ - # There are two different scheduler trace events that expose the load - # tracking signals. Neither of them is in mainline. Eventually they - # should be unified but for now we'll just check for both types of - # event. - # TODO: Add support for this parsing in Trappy and/or tasks_analysis - signal_fields = signals - if 'sched_load_avg_task' in trace.available_events: - event = 'sched_load_avg_task' - elif 'sched_load_se' in trace.available_events: - event = 'sched_load_se' - # sched_load_se uses 'util' and 'load' instead of 'util_avg' and - # 'load_avg' - signal_fields = [s.replace('_avg', '') for s in signals] - elif 'sched_pelt_se' in trace.available_events: - event = 'sched_pelt_se' - else: - raise ValueError('No sched_load_avg_task or sched_load_se or sched_pelt_se events. ' - 'Does the kernel support them?') - - df = trace.df_events(event) - df = df[df['comm'] == task_name][signal_fields] + df = trace.analysis.load_tracking.df_tasks_signals() + df = df[df['comm'] == task_name] window = self.get_task_window(trace, task_name, cpu) df = select_window(df, window) # Normalize the signal with the detected task execution start df.index -= window[0] - return df.rename(columns=dict(zip(signal_fields, signals))) + return df @staticmethod def is_almost_equal(target, value, allowed_delta_pct): @@ -271,7 +252,7 @@ class InvarianceBase(LoadTrackingBase): expected values :type allowed_error_pct: float """ - return self._test_signal('util_avg', allowed_error_pct) + return self._test_signal('util', allowed_error_pct) class CpuInvariance(InvarianceBase): """ @@ -396,7 +377,7 @@ class FreqInvarianceItem(InvarianceBase): expected values :type allowed_error_pct: float """ - return self._test_signal('load_avg', allowed_error_pct) + return self._test_signal('load', allowed_error_pct) class FreqInvariance(TestBundle, LoadTrackingHelpers): @@ -715,7 +696,7 @@ class PELTTask(LoadTrackingBase): :param allowed_error_pct: The allowed range difference """ - return self._test_range('util_avg', allowed_error_pct) + return self._test_range('util', allowed_error_pct) def test_load_avg_range(self, allowed_error_pct=15) -> ResultBundle: """ @@ -723,7 +704,7 @@ class PELTTask(LoadTrackingBase): :param allowed_error_pct: The allowed range difference """ - return self._test_range('load_avg', allowed_error_pct) + return self._test_range('load', allowed_error_pct) def test_util_avg_behaviour(self, error_margin_pct=5, allowed_error_pct=5)\ -> ResultBundle: @@ -736,7 +717,7 @@ class PELTTask(LoadTrackingBase): :param allowed_error_pct: How many PELT errors (determined by ``error_margin_pct```) are allowed """ - return self._test_behaviour('util_avg', error_margin_pct, allowed_error_pct) + return self._test_behaviour('util', error_margin_pct, allowed_error_pct) def test_load_avg_behaviour(self, error_margin_pct=5, allowed_error_pct=5)\ -> ResultBundle: @@ -749,6 +730,4 @@ class PELTTask(LoadTrackingBase): :param allowed_error_pct: How many PELT errors (determined by ``error_margin_pct```) are allowed """ - return self._test_behaviour('load_avg', error_margin_pct, allowed_error_pct) - - + return self._test_behaviour('load', error_margin_pct, allowed_error_pct) diff --git a/lisa/tests/kernel/scheduler/misfit.py b/lisa/tests/kernel/scheduler/misfit.py index 0765fa7c9d2477f5d9ed44bd2e56dbb042e652bb..a10c3d87386042925e3101fea004e943ab4cf3c8 100644 --- a/lisa/tests/kernel/scheduler/misfit.py +++ b/lisa/tests/kernel/scheduler/misfit.py @@ -24,6 +24,7 @@ from lisa.trace import Trace from lisa.wlgen.rta import Periodic from lisa.tests.kernel.test_bundle import RTATestBundle, Result, ResultBundle, CannotCreateError, TestMetric from lisa.env import TestEnv +from lisa.analysis.tasks import TaskState class MisfitMigrationBase(RTATestBundle): """ @@ -202,14 +203,12 @@ class StaggeredFinishes(MisfitMigrationBase): return profile - def _trim_lat_df(self, lat_df): - if lat_df.empty: - return lat_df + def _trim_state_df(self, state_df): + if state_df.empty: + return state_df - lat_df = Trace.squash_df(lat_df, self.start_time, - lat_df.index[-1] + lat_df.t_delta.values[-1], "t_delta") - # squash_df only updates t_delta, remove t_start to make sure it's not used - return lat_df.drop('t_start', 1) + return Trace.squash_df(state_df, self.start_time, + state_df.index[-1] + state_df.delta.values[-1], "delta") def test_preempt_time(self, allowed_preempt_pct=1) -> ResultBundle: """ @@ -217,13 +216,13 @@ class StaggeredFinishes(MisfitMigrationBase): """ sdf = self.trace.df_events('sched_switch') - latency_dfs = { - task : self.trace.analysis.latency.df_latency(task) + task_state_dfs = { + task : self.trace.analysis.tasks.df_task_states(task) for task in self.rtapp_profile.keys() } res = ResultBundle.from_bool(True) - for task, lat_df in latency_dfs.items(): + for task, state_df in task_state_dfs.items(): # The sched_switch dataframe where the misfit task # is replaced by another misfit task preempt_sdf = sdf[ @@ -231,15 +230,15 @@ class StaggeredFinishes(MisfitMigrationBase): (sdf.next_comm.str.startswith(self.task_prefix)) ] - lat_df = self._trim_lat_df( - lat_df[ - (lat_df.index.isin(preempt_sdf.index)) & + state_df = self._trim_state_df( + state_df[ + (state_df.index.isin(preempt_sdf.index)) & # Ensure this is a preemption and not just the task ending - (lat_df.curr_state == "S") + (state_df.curr_state == TaskState.TASK_INTERRUPTIBLE.char) ] ) - preempt_time = lat_df.t_delta.sum() + preempt_time = state_df.delta.sum() preempt_pct = (preempt_time / self.duration) * 100 res.add_metric("{} preemption".format(task), { @@ -257,7 +256,7 @@ class StaggeredFinishes(MisfitMigrationBase): :returns: A dataframe that describes the idle status (on/off) of 'cpu' """ active_df = pd.DataFrame(self.trace.getCPUActiveSignal(cpu), columns=['state']) - self.trace.addEventsDeltas(active_df) + self.trace.add_events_deltas(active_df) return active_df def _max_idle_time(self, start, end, cpus): @@ -282,17 +281,17 @@ class StaggeredFinishes(MisfitMigrationBase): return max_time, max_cpu - def _test_cpus_busy(self, latency_dfs, cpus, allowed_idle_time_s): + def _test_cpus_busy(self, task_state_dfs, cpus, allowed_idle_time_s): """ Test that for every window in which the tasks are running, :attr:`cpus` are not idle for more than :attr:`allowed_idle_time_s` """ res = ResultBundle.from_bool(True) - for task, lat_df in latency_dfs.items(): + for task, state_df in task_state_dfs.items(): # Have a look at every task activation - task_idle_times = [self._max_idle_time(index, index + row.t_delta, cpus) - for index, row in lat_df.iterrows()] + task_idle_times = [self._max_idle_time(index, index + row.delta, cpus) + for index, row in state_df.iterrows()] if not task_idle_times: continue @@ -322,22 +321,21 @@ class StaggeredFinishes(MisfitMigrationBase): first migration. """ - latency_dfs = {} + task_state_dfs = {} for task in self.rtapp_profile.keys(): - df = self.trace.analysis.latency.df_latency(task) - df = self._trim_lat_df(df[ - # Task is active - df.curr_state == "A" + df = self.trace.analysis.tasks.df_task_states(task) + df = self._trim_state_df(df[ + df.curr_state == TaskState.TASK_ACTIVE.char ]) # The first time the task runs on a big - first_big = df[df["__cpu"].isin(self.dst_cpus)].index[0] + first_big = df[df.cpu.isin(self.dst_cpus)].index[0] - df = df[df["__cpu"].isin(self.src_cpus)] + df = df[df.cpu.isin(self.src_cpus)] - latency_dfs[task] = df[:first_big] + task_state_dfs[task] = df[:first_big] - return self._test_cpus_busy(latency_dfs, self.dst_cpus, allowed_delay_s) + return self._test_cpus_busy(task_state_dfs, self.dst_cpus, allowed_delay_s) def test_throughput(self, allowed_idle_time_s=0.001) -> ResultBundle: """ @@ -350,16 +348,16 @@ class StaggeredFinishes(MisfitMigrationBase): pass. :type allowed_idle_time_s: int """ - latency_dfs = {} + task_state_dfs = {} for task in self.rtapp_profile.keys(): # This test is all about throughput: check that every time a task # runs on a little it's because bigs are busy - df = self.trace.analysis.latency.df_latency(task) - latency_dfs[task] = self._trim_lat_df(df[ + df = self.trace.analysis.tasks.df_task_states(task) + task_state_dfs[task] = self._trim_state_df(df[ # Task is active - (df.curr_state == "A") & + (df.curr_state == TaskState.TASK_ACTIVE.char) & # Task needs to be upmigrated - (df["__cpu"].isin(self.src_cpus)) + (df.cpu.isin(self.src_cpus)) ]) - return self._test_cpus_busy(latency_dfs, self.dst_cpus, allowed_idle_time_s) + return self._test_cpus_busy(task_state_dfs, self.dst_cpus, allowed_idle_time_s) diff --git a/lisa/tests/lisa/test_trace.py b/lisa/tests/lisa/test_trace.py index 7a9791e3e2b21b3d69e1070dbcb307a2d2849c67..4cea9bb7956d97e65b8297b6be46e80dc8949d86 100644 --- a/lisa/tests/lisa/test_trace.py +++ b/lisa/tests/lisa/test_trace.py @@ -79,23 +79,23 @@ class TestTrace(StorageTestCase): path = os.path.join(trace_dir, 'plat_info.yml') return PlatformInfo.from_yaml_map(path) - def test_getTaskByName(self): - """TestTrace: getTaskByName() returns the list of PIDs for all tasks with the specified name""" + def test_get_task_by_name(self): + """TestTrace: get_task_by_name() returns the list of PIDs for all tasks with the specified name""" for name, pids in [('watchdog/0', [12]), ('sh', [1642, 1702, 1717, 1718]), ('NOT_A_TASK', [])]: - self.assertEqual(self.trace.getTaskByName(name), pids) + self.assertEqual(self.trace.get_task_by_name(name), pids) - def test_getTaskByPid(self): - """TestTrace: getTaskByPid() returns the name of the task with the specified PID""" + def test_get_task_by_pid(self): + """TestTrace: get_task_by_pid() returns the name of the task with the specified PID""" for pid, names in [(15, 'watchdog/1'), (1639, 'sshd'), (987654321, None)]: - self.assertEqual(self.trace.getTaskByPid(pid), names) + self.assertEqual(self.trace.get_task_by_pid(pid), names) - def test_getTasks(self): - """TestTrace: getTasks() returns a dictionary mapping PIDs to a single task name""" - tasks_dict = self.trace.getTasks() + def test_get_tasks(self): + """TestTrace: get_tasks() returns a dictionary mapping PIDs to a single task name""" + tasks_dict = self.trace.get_tasks() for pid, name in [(1, 'init'), (9, 'rcu_sched'), (1383, 'jbd2/sda2-8')]: @@ -109,9 +109,9 @@ class TestTrace(StorageTestCase): """ trace = self.make_trace(in_data) - self.assertEqual(trace.getTaskByPid(1234), 'father') - self.assertEqual(trace.getTaskByPid(5678), 'child') - self.assertEqual(trace.getTaskByName('father'), [1234]) + self.assertEqual(trace.get_task_by_pid(1234), 'father') + self.assertEqual(trace.get_task_by_pid(5678), 'child') + self.assertEqual(trace.get_task_by_name('father'), [1234]) def test_time_range(self): """ @@ -242,7 +242,7 @@ class TestTrace(StorageTestCase): """ trace = self.make_trace(in_data) - trace.analysis.idle.plot_cpu_idle_state_residency() + trace.analysis.idle.plot_cpu_idle_state_residency(0) def test_deriving_cpus_count(self): """Test that Trace derives cpus_count if it isn't provided""" @@ -259,7 +259,7 @@ class TestTrace(StorageTestCase): self.assertEqual(trace.cpus_count, 3) - def test_df_cpu_wakeups(self): + def test_df_cpus_wakeups(self): """ Test the cpu_wakeups DataFrame getter """ @@ -276,14 +276,14 @@ class TestTrace(StorageTestCase): -0 [004] 519.023080: cpu_idle: state=1 cpu_id=4 """) - df = trace.analysis.cpus.df_cpu_wakeups() + df = trace.analysis.idle.df_cpus_wakeups() exp_index=[519.021928, 519.022641, 519.022642, 519.022643, 519.022867] exp_cpus= [ 4, 4, 1, 2, 3] self.assertListEqual(df.index.tolist(), exp_index) self.assertListEqual(df.cpu.tolist(), exp_cpus) - df = trace.analysis.cpus.df_cpu_wakeups([2]) + df = df[df.cpu == 2] self.assertListEqual(df.index.tolist(), [519.022643]) self.assertListEqual(df.cpu.tolist(), [2]) @@ -292,29 +292,18 @@ class TestTrace(StorageTestCase): """Helper for smoke testing _dfg methods in tasks_analysis""" trace = self.get_trace(trace_name) - lt_df = trace.analysis.tasks.df_load() - columns = ['comm', 'pid', 'load_avg', 'util_avg', 'cpu'] - if trace.has_big_little: - columns += ['cluster'] - if 'nrg-model' in trace.plat_info: - columns += ['min_cluster_cap'] + lt_df = trace.analysis.load_tracking.df_tasks_signals() + columns = ['comm', 'pid', 'load', 'util', '__cpu'] for column in columns: msg = 'Task signals parsed from {} missing {} column'.format( trace.data_dir, column) self.assertIn(column, lt_df, msg=msg) - if trace.has_big_little: - df = trace.analysis.tasks.df_top_big_tasks(min_samples=1) - for column in ['samples', 'comm']: - msg = 'Big tasks parsed from {} missing {} column'.format( - trace.data_dir, column) - self.assertIn(column, df, msg=msg) - # Pick an arbitrary PID to try plotting signals for. pid = lt_df['pid'].unique()[0] # Call plot - although we won't check the results we can just check # that things aren't totally borken. - trace.analysis.tasks.plot_tasks(tasks=[pid]) + trace.analysis.load_tracking.plot_task_signals(pid) def test_sched_load_signals(self): """Test parsing sched_load_se events from EAS upstream integration""" @@ -364,7 +353,7 @@ class TestTraceNoClusterData(TestTrace): def _get_plat_info(self, trace_name=None): plat_info = super(TestTraceNoClusterData, self)._get_plat_info(trace_name) plat_info = copy.copy(plat_info) - plat_info.force_src('clusters', ['SOURCE THAT DOES NOT EXISTS']) + plat_info.force_src('freq-domains', ['SOURCE THAT DOES NOT EXISTS']) return plat_info class TestTraceNoPlatform(TestTrace): diff --git a/lisa/tests/lisa/traces/plat_info.yml b/lisa/tests/lisa/traces/plat_info.yml index 80fd5d1a710b0e0c27a8979704cba7269da4e7cd..e84ef5dae36f5bba75f688f70dbb3ae0424bebc5 100644 --- a/lisa/tests/lisa/traces/plat_info.yml +++ b/lisa/tests/lisa/traces/plat_info.yml @@ -1,27 +1,355 @@ platform-info: conf: - os: linux abi: arm64 - clusters: - big: [1, 2] - little: [0, 3, 4, 5] + cpu-capacities: + 0: 446 + 1: 1024 + 2: 1024 + 3: 446 + 4: 446 + 5: 446 cpus-count: 6 - topology: !call:trappy.stats.Topology.Topology - clusters: [[0, 3, 4, 5], [1, 2]] + freq-domains: + - - 0 + - 3 + - 4 + - 5 + - - 1 + - 2 freqs: - big: [ - 450000, - 625000, - 800000, - 950000, - 1100000 - ] - little: [ - 450000, - 575000, - 700000, - 775000, - 850000 - ] - kernel-version: !call:devlib.target.KernelVersion - version_string: "4.11.0-rc6-00091-g48e946ac4235-dirty #67 SMP PREEMPT Mon Jul 3 16:23:08 BST 2017" + 0: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + 1: + - 450000 + - 625000 + - 800000 + - 950000 + - 1100000 + 2: + - 450000 + - 625000 + - 800000 + - 950000 + - 1100000 + 3: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + 4: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + 5: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + kernel-version: !!python/object:devlib.target.KernelVersion + major: 19 + minor: 0 + parts: !!python/tuple + - 4 + - 19 + - 0 + rc: null + release: 4.19.0-07801-gf317706 + sha1: f317706 + version: 38 SMP PREEMPT Fri Nov 30 13:55:54 GMT 2018 + version_number: 4 + name: juno + nrg-model: !!python/object:lisa.energy_model.EnergyModel + cpu_nodes: + - &id001 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 0 + cpus: !!python/tuple + - 0 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu0 + parent: &id002 !!python/object:lisa.energy_model.EnergyModelRoot + active_states: null + children: + - *id001 + - &id003 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 418.90909090909093 + - 160 + - 625000: !!python/object/new:lisa.energy_model.ActiveState + - 581.8181818181819 + - 239 + - 800000: !!python/object/new:lisa.energy_model.ActiveState + - 744.7272727272727 + - 343 + - 950000: !!python/object/new:lisa.energy_model.ActiveState + - 884.3636363636364 + - 454 + - 1100000: !!python/object/new:lisa.energy_model.ActiveState + - 1024.0 + - 583 + children: [] + cpu: 1 + cpus: !!python/tuple + - 1 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu1 + parent: *id002 + - &id004 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 418.90909090909093 + - 160 + - 625000: !!python/object/new:lisa.energy_model.ActiveState + - 581.8181818181819 + - 239 + - 800000: !!python/object/new:lisa.energy_model.ActiveState + - 744.7272727272727 + - 343 + - 950000: !!python/object/new:lisa.energy_model.ActiveState + - 884.3636363636364 + - 454 + - 1100000: !!python/object/new:lisa.energy_model.ActiveState + - 1024.0 + - 583 + children: [] + cpu: 2 + cpus: !!python/tuple + - 2 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu2 + parent: *id002 + - &id005 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 3 + cpus: !!python/tuple + - 3 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu3 + parent: *id002 + - &id006 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 4 + cpus: !!python/tuple + - 4 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu4 + parent: *id002 + - &id007 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 5 + cpus: !!python/tuple + - 5 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu5 + parent: *id002 + cpu: null + cpus: &id015 !!python/tuple + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + idle_states: null + name: null + parent: null + - *id003 + - *id004 + - *id005 + - *id006 + - *id007 + cpu_pds: + - &id008 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 0 + cpus: !!python/tuple + - 0 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: &id009 !!python/object:lisa.energy_model.PowerDomain + children: + - *id008 + - &id010 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 1 + cpus: !!python/tuple + - 1 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id011 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 2 + cpus: !!python/tuple + - 2 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id012 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 3 + cpus: !!python/tuple + - 3 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id013 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 4 + cpus: !!python/tuple + - 4 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id014 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 5 + cpus: !!python/tuple + - 5 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + cpu: null + cpus: !!python/tuple + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + idle_states: [] + name: null + parent: null + - *id010 + - *id011 + - *id012 + - *id013 + - *id014 + cpus: *id015 + freq_domains: + - - 1 + - 2 + - - 0 + - 3 + - 4 + - 5 + pd: *id009 + root: *id002 + os: linux + rtapp: + calib: + 0: 660 + 1: 313 + 2: 313 + 3: 660 + 4: 660 + 5: 661 diff --git a/lisa/tests/lisa/traces/sched_load/plat_info.yml b/lisa/tests/lisa/traces/sched_load/plat_info.yml index b1af466251bfce0810740a973ce0f8d4459fcb9b..e84ef5dae36f5bba75f688f70dbb3ae0424bebc5 100644 --- a/lisa/tests/lisa/traces/sched_load/plat_info.yml +++ b/lisa/tests/lisa/traces/sched_load/plat_info.yml @@ -1,28 +1,355 @@ platform-info: conf: - os: linux abi: arm64 - clusters: - big: [1, 2] - little: [0, 3, 4, 5] + cpu-capacities: + 0: 446 + 1: 1024 + 2: 1024 + 3: 446 + 4: 446 + 5: 446 cpus-count: 6 + freq-domains: + - - 0 + - 3 + - 4 + - 5 + - - 1 + - 2 freqs: - big: [ - 450000, - 625000, - 800000, - 950000, - 1100000 - ] - little: [ - 450000, - 575000, - 700000, - 775000, - 850000 - ] - kernel-version: !call:devlib.target.KernelVersion - version_string: "4.11.0-rc6-00092-g9cc3141d9e4f-dirty #58 SMP PREEMPT Wed May 24 18:37:50 BST 2017" - - topology: !call:trappy.stats.Topology.Topology - clusters: [[0, 3, 4, 5], [1, 2]] + 0: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + 1: + - 450000 + - 625000 + - 800000 + - 950000 + - 1100000 + 2: + - 450000 + - 625000 + - 800000 + - 950000 + - 1100000 + 3: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + 4: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + 5: + - 450000 + - 575000 + - 700000 + - 775000 + - 850000 + kernel-version: !!python/object:devlib.target.KernelVersion + major: 19 + minor: 0 + parts: !!python/tuple + - 4 + - 19 + - 0 + rc: null + release: 4.19.0-07801-gf317706 + sha1: f317706 + version: 38 SMP PREEMPT Fri Nov 30 13:55:54 GMT 2018 + version_number: 4 + name: juno + nrg-model: !!python/object:lisa.energy_model.EnergyModel + cpu_nodes: + - &id001 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 0 + cpus: !!python/tuple + - 0 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu0 + parent: &id002 !!python/object:lisa.energy_model.EnergyModelRoot + active_states: null + children: + - *id001 + - &id003 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 418.90909090909093 + - 160 + - 625000: !!python/object/new:lisa.energy_model.ActiveState + - 581.8181818181819 + - 239 + - 800000: !!python/object/new:lisa.energy_model.ActiveState + - 744.7272727272727 + - 343 + - 950000: !!python/object/new:lisa.energy_model.ActiveState + - 884.3636363636364 + - 454 + - 1100000: !!python/object/new:lisa.energy_model.ActiveState + - 1024.0 + - 583 + children: [] + cpu: 1 + cpus: !!python/tuple + - 1 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu1 + parent: *id002 + - &id004 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 418.90909090909093 + - 160 + - 625000: !!python/object/new:lisa.energy_model.ActiveState + - 581.8181818181819 + - 239 + - 800000: !!python/object/new:lisa.energy_model.ActiveState + - 744.7272727272727 + - 343 + - 950000: !!python/object/new:lisa.energy_model.ActiveState + - 884.3636363636364 + - 454 + - 1100000: !!python/object/new:lisa.energy_model.ActiveState + - 1024.0 + - 583 + children: [] + cpu: 2 + cpus: !!python/tuple + - 2 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu2 + parent: *id002 + - &id005 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 3 + cpus: !!python/tuple + - 3 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu3 + parent: *id002 + - &id006 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 4 + cpus: !!python/tuple + - 4 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu4 + parent: *id002 + - &id007 !!python/object:lisa.energy_model.EnergyModelNode + active_states: !!omap + - 450000: !!python/object/new:lisa.energy_model.ActiveState + - 236.11764705882354 + - 42 + - 575000: !!python/object/new:lisa.energy_model.ActiveState + - 301.70588235294116 + - 58 + - 700000: !!python/object/new:lisa.energy_model.ActiveState + - 367.29411764705884 + - 79 + - 775000: !!python/object/new:lisa.energy_model.ActiveState + - 406.6470588235294 + - 97 + - 850000: !!python/object/new:lisa.energy_model.ActiveState + - 446.0 + - 119 + children: [] + cpu: 5 + cpus: !!python/tuple + - 5 + idle_states: !!omap + - WFI: 0 + - cpu-sleep-0: 0 + - cluster-sleep-0: 0 + name: cpu5 + parent: *id002 + cpu: null + cpus: &id015 !!python/tuple + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + idle_states: null + name: null + parent: null + - *id003 + - *id004 + - *id005 + - *id006 + - *id007 + cpu_pds: + - &id008 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 0 + cpus: !!python/tuple + - 0 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: &id009 !!python/object:lisa.energy_model.PowerDomain + children: + - *id008 + - &id010 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 1 + cpus: !!python/tuple + - 1 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id011 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 2 + cpus: !!python/tuple + - 2 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id012 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 3 + cpus: !!python/tuple + - 3 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id013 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 4 + cpus: !!python/tuple + - 4 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + - &id014 !!python/object:lisa.energy_model.PowerDomain + children: [] + cpu: 5 + cpus: !!python/tuple + - 5 + idle_states: + - WFI + - cpu-sleep-0 + - cluster-sleep-0 + name: null + parent: *id009 + cpu: null + cpus: !!python/tuple + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + idle_states: [] + name: null + parent: null + - *id010 + - *id011 + - *id012 + - *id013 + - *id014 + cpus: *id015 + freq_domains: + - - 1 + - 2 + - - 0 + - 3 + - 4 + - 5 + pd: *id009 + root: *id002 + os: linux + rtapp: + calib: + 0: 660 + 1: 313 + 2: 313 + 3: 660 + 4: 660 + 5: 661 diff --git a/lisa/tests/lisa/traces/sched_load_avg/plat_info.yml b/lisa/tests/lisa/traces/sched_load_avg/plat_info.yml index 23578d8c8c1d8aa96b921a0401ee6ee693d2e63b..807720c9923be01d65ed169729abc88019bbfd40 100644 --- a/lisa/tests/lisa/traces/sched_load_avg/plat_info.yml +++ b/lisa/tests/lisa/traces/sched_load_avg/plat_info.yml @@ -2,20 +2,18 @@ platform-info: conf: os: android abi: arm64 - clusters: - big: [2, 3] - little: [0, 1] cpus-count: 4 + freq-domains: [[0, 1], [2, 3]] freqs: - big: [ + 0: [ 307200, 384000, 460800, 537600, 614400, 691200, - 748800, - 825600, + 768000, + 844800, 902400, 979200, 1056000, @@ -25,24 +23,17 @@ platform-info: 1363200, 1440000, 1516800, - 1593600, - 1670400, - 1747200, - 1824000, - 1900800, - 1977600, - 2054400, - 2150400 - ] - little: [ + 1593600 + ] + 1: [ 307200, 384000, 460800, 537600, 614400, 691200, - 768000, - 844800, + 748800, + 825600, 902400, 979200, 1056000, @@ -52,10 +43,14 @@ platform-info: 1363200, 1440000, 1516800, - 1593600 - ] + 1593600, + 1670400, + 1747200, + 1824000, + 1900800, + 1977600, + 2054400, + 2150400 + ] kernel-version: !call:devlib.target.KernelVersion version_string: "3.18.31-gbd96fbf #1 SMP PREEMPT Mon Nov 7 20:29:14 UTC 2016" - - topology: !call:trappy.stats.Topology.Topology - clusters: [[0, 1], [2, 3]] diff --git a/lisa/trace.py b/lisa/trace.py index 7786d818c28f29c26026b1ad4523689fb84222c4..86a08cc8bcbe2ed98e2cdb2b33454c72ffea577b 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -125,7 +125,12 @@ class Trace(Loggable): # By deafult, use the trace dir to save plots self.plots_dir = plots_dir if self.plots_dir is None: - self.plots_dir = self.data_dir + # In case we're passed the trace.dat + if os.path.isfile(data_dir): + self.plots_dir = os.path.dirname(data_dir) + else: + self.plots_dir = data_dir + self.plots_prefix = plots_prefix self.__registerTraceEvents(events) @@ -333,7 +338,7 @@ class Trace(Loggable): name_key : 'TaskName'}) .set_index('PID').sort_index()) - def getTaskByName(self, name): + def get_task_by_name(self, name): """ Get the PIDs of all tasks with the specified name. @@ -354,7 +359,7 @@ class Trace(Loggable): return (self._tasks_by_pid[self._tasks_by_pid.TaskName == name] .index.tolist()) - def getTaskByPid(self, pid): + def get_task_by_pid(self, pid): """ Get the name of the task with the specified PID. @@ -377,7 +382,27 @@ class Trace(Loggable): except KeyError: return None - def getTasks(self): + def get_task_pid(self, task): + """ + Helper that takes either a name or a PID and always returns a PID + + :param task: Either the task name or the task PID + :type task: int or str + """ + if isinstance(task, str): + pid_list = self.get_task_by_name(task) + if len(pid_list) > 1: + self.get_logger().warning( + "More than one PID found for task {}, " + "using the first one ({})".format(task, pid_list[0])) + pid = pid_list[0] + else: + pid = task + + return pid + + + def get_tasks(self): """ Get a dictionary of all the tasks in the Trace. @@ -465,13 +490,6 @@ class Trace(Loggable): ############################################################################### # Trace Events Sanitize Methods ############################################################################### - @property - def has_big_little(self): - return ('clusters' in self.plat_info - and 'big' in self.plat_info['clusters'] - and 'little' in self.plat_info['clusters'] - and 'nrg-model' in self.plat_info) - def _sanitize_SchedCpuCapacity(self): """ Add more columns to cpu_capacity data frame if the energy model is @@ -607,7 +625,7 @@ class Trace(Loggable): return df = self.df_events('sched_overutilized') - self.addEventsDeltas(df, 'len') + self.add_events_deltas(df, 'len') # Build a stat on trace overutilization self.overutilized_time = df[df.overutilized == 1].len.sum() @@ -664,7 +682,7 @@ class Trace(Loggable): """ logger = self.get_logger() if not self.hasEvents('cpu_frequency_devlib') \ - or 'clusters' not in self.plat_info: + or 'freq-domains' not in self.plat_info: return devlib_freq = self.df_events('cpu_frequency_devlib') @@ -672,7 +690,7 @@ class Trace(Loggable): devlib_freq.rename(columns={'state':'frequency'}, inplace=True) df = self.df_events('cpu_frequency') - clusters = self.plat_info['clusters'] + domains = self.plat_info['freq-domains'] # devlib always introduces fake cpu_frequency events, in case the # OS has not generated cpu_frequency envets there are the only @@ -697,29 +715,29 @@ class Trace(Loggable): # Inject "initial" devlib frequencies os_df = df dl_df = devlib_freq.iloc[:self.cpus_count] - for _,c in self.plat_info['clusters'].items(): - dl_freqs = dl_df[dl_df.cpu.isin(c)] - os_freqs = os_df[os_df.cpu.isin(c)] - logger.debug("First freqs for %s:\n%s", c, dl_freqs) + for cpus in domains: + dl_freqs = dl_df[dl_df.cpu.isin(cpus)] + os_freqs = os_df[os_df.cpu.isin(cpus)] + logger.debug("First freqs for %s:\n%s", cpus, dl_freqs) # All devlib events "before" os-generated events logger.debug("Min os freq @: %s", os_freqs.index.min()) if os_freqs.empty or \ os_freqs.index.min() > dl_freqs.index.max(): - logger.debug("Insert devlib freqs for %s", c) + logger.debug("Insert devlib freqs for %s", cpus) df = pd.concat([dl_freqs, df]) # Inject "final" devlib frequencies os_df = df dl_df = devlib_freq.iloc[self.cpus_count:] - for _,c in self.plat_info['clusters'].items(): - dl_freqs = dl_df[dl_df.cpu.isin(c)] - os_freqs = os_df[os_df.cpu.isin(c)] - logger.debug("Last freqs for %s:\n%s", c, dl_freqs) + for cpus in domains: + dl_freqs = dl_df[dl_df.cpu.isin(cpus)] + os_freqs = os_df[os_df.cpu.isin(cpus)] + logger.debug("Last freqs for %s:\n%s", cpus, dl_freqs) # All devlib events "after" os-generated events logger.debug("Max os freq @: %s", os_freqs.index.max()) if os_freqs.empty or \ os_freqs.index.max() < dl_freqs.index.min(): - logger.debug("Append devlib freqs for %s", c) + logger.debug("Append devlib freqs for %s", cpus) df = pd.concat([df, dl_freqs]) df.sort_index(inplace=True) @@ -727,7 +745,7 @@ class Trace(Loggable): setattr(self.ftrace.cpu_frequency, 'data_frame', df) # Frequency Coherency Check - for _, cpus in clusters.items(): + for cpus in domains: cluster_df = df[df.cpu.isin(cpus)] for chunk in self._chunker(cluster_df, len(cpus)): f = chunk.iloc[0].frequency @@ -834,50 +852,6 @@ class Trace(Loggable): # Fix sequences of wakeup/sleep events reported with the same index return handle_duplicate_index(cpu_active) - @memoized - def getClusterActiveSignal(self, cluster): - """ - Build a square wave representing the active (i.e. non-idle) cluster - time, i.e.: - - cluster_active[t] == 1 if at least one CPU is reported to be non-idle - by CPUFreq at time t - cluster_active[t] == 0 otherwise - - :param cluster: list of CPU IDs belonging to a cluster - :type cluster: list(int) - - :returns: A :class:`pandas.Series` or ``None`` if the trace contains no - "cpu_idle" events - """ - if not self.hasEvents('cpu_idle'): - self.get_logger().warning('Events [cpu_idle] not found, ' - 'cannot compute cluster active signal!') - return None - - active = self.getCPUActiveSignal(cluster[0]).to_frame(name=cluster[0]) - for cpu in cluster[1:]: - active = active.join( - self.getCPUActiveSignal(cpu).to_frame(name=cpu), - how='outer' - ) - - active.fillna(method='ffill', inplace=True) - # There might be NaNs in the signal where we got data from some CPUs - # before others. That will break the .astype(int) below, so drop rows - # with NaN in them. - active.dropna(inplace=True) - - # Cluster active is the OR between the actives on each CPU - # belonging to that specific cluster - cluster_active = reduce( - operator.or_, - [cpu_active.astype(int) for _, cpu_active in - active.items()] - ) - - return cluster_active - @memoized def getPeripheralClockEffectiveRate(self, clk_name): logger = self.get_logger() @@ -913,11 +887,22 @@ class Trace(Loggable): np.where(freq['state'] == 1, freq['rate'], float('nan'))) return freq - def addEventsDeltas(self, df, col_name='delta'): + def add_events_deltas(self, df, col_name='delta', inplace=True): """ - Compute the time between each event in a dataframe, and store it in a - new column. This only really makes sense for events tracking an - on/off state (e.g. overutilized, idle) + Store the time between each event in a new dataframe column + + :param df: The DataFrame to operate one + :type df: pandas.DataFrame + + :param col_name: The name of the column to add + :type col_name: str + + :param inplace: Whether to operate on the passed DataFrame, or to use + a copy of it + :type inplace: bool + + This method only really makes sense for events tracking an on/off state + (e.g. overutilized, idle) """ if df.empty: return df @@ -926,14 +911,18 @@ class Trace(Loggable): raise RuntimeError("Column {} is already present in the dataframe". format(col_name)) - df['start'] = df.index - df[col_name] = (df.start - df.start.shift()).fillna(0).shift(-1) - df.drop('start', axis=1, inplace=True) + if not inplace: + df = df.copy() + + time_df = pd.DataFrame(index=df.index, data=df.index.values, columns=["start"]) + df[col_name] = (time_df.start - time_df.start.shift()).fillna(0).shift(-1) # Fix the last event, which will have a NaN duration # Set duration to trace_end - last_event df.loc[df.index[-1], col_name] = self.start_time + self.time_range - df.index[-1] + return df + @staticmethod def squash_df(df, start, end, column='delta'): """ @@ -942,7 +931,7 @@ class Trace(Loggable): The input dataframe is expected to have a "column" which reports the time delta between consecutive rows, as for example dataframes - generated by addEventsDeltas(). + generated by add_events_deltas(). The returned dataframe is granted to have an initial and final event at the specified "start" ("end") index values, which values diff --git a/lisa/utils.py b/lisa/utils.py index 3b1f8fc9f062589af006e866714799224f51d5a0..a796649235e7db8974245b3a5975eaf8a5f3d5c9 100644 --- a/lisa/utils.py +++ b/lisa/utils.py @@ -1118,12 +1118,15 @@ class TypedList(GenericContainerBase, list, metaclass=GenericSequenceMeta): class IntIntDict(TypedDict): _type = (int, int) -class IntRealDict(TypedDict): - _type = (int, numbers.Real) - class IntList(TypedList): _type = int +class IntIntListDict(TypedDict): + _type = (int, IntList) + +class IntListList(TypedList): + _type = IntList + class StrList(TypedList): _type = str