diff --git a/doc/trace_analysis.rst b/doc/trace_analysis.rst index b9044f7c5bd0b9a8f1fa38c7312263056507048a..3fe70732b97bcd61d98c5de3e352774e72bb68a9 100644 --- a/doc/trace_analysis.rst +++ b/doc/trace_analysis.rst @@ -206,3 +206,9 @@ Trace parsers .. autoclass:: lisa.trace.TrappyTraceParser :members: + +EAS ++++++++ + +.. automodule:: lisa.analysis.eas + :members: diff --git a/lisa/analysis/eas.py b/lisa/analysis/eas.py new file mode 100644 index 0000000000000000000000000000000000000000..79beaf4bef5cf95e410ecfbc9ba27b035563c6a9 --- /dev/null +++ b/lisa/analysis/eas.py @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (C) 2020, ARM Limited and contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pandas as pd +import numpy as np + +from lisa.analysis.base import TraceAnalysisBase +from lisa.trace import requires_events +from lisa.datautils import df_delta + +class EASAnalysis(TraceAnalysisBase): + """ + Energy Aware Scheduler (EAS) specific analysis. + + :param trace: input Trace object + :type trace: lisa.trace.Trace + """ + name = 'eas' + +############################################################################### +# DataFrame Getter Methods +############################################################################### + + @TraceAnalysisBase.cache + @requires_events('sched_pre_feec', 'sched_post_feec') + def df_feec_delta(self, tasks=None): + """ + DataFrame containing ``find_energy_efficient_cpu`` (feec) related + information. + + :param tasks: Task names or PIDs or ``(pid, comm)`` to look for. + :type tasks: list(int or str or tuple(int, str)) + + :returns: a :class:`pandas.DataFrame` indexed by ``Time`` with: + + - A ``pid`` column. + - A ``comm`` column. + - A ``__cpu`` column (the cpu feec was executed on). + - A ``prev_cpu`` column (the cpu the task was running on). + - A ``dst_cpu`` column (the cpu selected by feec). + - A ``delta`` column (duration of feec function call). + """ + pre_df = self.trace.df_event('sched_pre_feec') + post_df = self.trace.df_event('sched_post_feec') + + # Filter the tasks. + if tasks: + pre_df = df_filter_task_ids(pre_df, tasks) + post_df = df_filter_task_ids(post_df, tasks) + + pre_df = pre_df.drop(columns=['__comm', '__pid']) + post_df = post_df.drop(columns=['__comm', '__pid']) + + # Also group by '__cpu': the events must be emitted from the same cpu. + return df_delta(pre_df, post_df, ['pid', 'comm', '__cpu']) + + +# vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/lisa/datautils.py b/lisa/datautils.py index c87123ca5f6d6df156b6a2bec957a55aeac2f539..842a9c33b129c6ab0d52cb0c6c35ab0304cf8808 100644 --- a/lisa/datautils.py +++ b/lisa/datautils.py @@ -416,6 +416,90 @@ def df_merge(df_list, drop_columns=None, drop_inplace=False, filter_columns=None return functools.reduce(merge, df_list) +def df_delta(pre_df, post_df, group_on): + """ + pre_df and post_df containing paired/consecutive events indexed by time, + df_delta() merges the two dataframes and adds a ``delta`` column + containing the time spent between the two events. + A typical usecase would be adding pre/post events at the entry/exit of a + function. + + Rows from ``pre_df`` and ``post_df`` are grouped by the ``group_on`` + columns. + E.g.: ``['pid', 'comm']`` to group by task. + Except columns listed in ``group_on``, ``pre_df`` and ``post_df`` must + have columns with different names. + + Events that cannot be paired are ignored. + + :param pre_df: Dataframe containing the events that start a record. + :type pre_df: pandas.DataFrame + + :param post_df: Dataframe containing the events that end a record. + :type post_df: pandas.DataFrame + + :param group_on: Columns used to group ``pre_df`` and ``post_df``. + E.g.: This would be ``['pid', 'comm']`` to group by task. + :type group_on: list(str) + + :returns: a :class:`pandas.DataFrame` indexed by the ``pre_df`` dataframe + with: + + * All the columns from the ``pre_df`` dataframe. + * All the columns from the ``post_df`` dataframe. + * A ``delta`` column (duration between the emission of a 'pre' event + and its consecutive 'post' event). + """ + pre_df = pre_df.copy(deep=False) + post_df = post_df.copy(deep=False) + + # Tag the rows to remember from which df they are coming from. + pre_df["is_pre"] = True + post_df["is_pre"] = False + + # Merge on columns common to the two dfs to avoid overlapping of names. + on_col = list(sorted(pre_df.columns & post_df.columns)) + + # Merging on nullable types converts columns to object. + # Merging on non-nullable types converts integer/boolean to float. + # Thus, let the on_col non-nullable and converts the others to nullable. + pre_df_cols = list(set(pre_df) - set(on_col)) + post_df_cols = list(set(post_df) - set(on_col)) + pre_df[pre_df_cols] = df_convert_to_nullable(pre_df[pre_df_cols]) + post_df[post_df_cols] = df_convert_to_nullable(post_df[post_df_cols]) + + # Merge. Don't allow column renaming. + df = pd.merge(pre_df, post_df, left_index=True, right_index=True, on=on_col, + how='outer', suffixes=(False, False)) + df.index.name = 'Time' + df.reset_index(inplace=True) + + # In each group, search for a faulty sequence (where pre/post events are + # not interleaving, e.g. pre1->pre2->post1->post2). + grouped = df.groupby(group_on, observed=True, sort=False) + if grouped['is_pre'].transform(lambda x: x == x.shift()).any(): + raise ValueError('Unexpected sequence of pre and post event (more than one "pre" or "post" in a row)') + + # Create the 'delta' column and add the columns from post_df + # in the rows coming from pre_df. + new_columns = dict( + delta=grouped['Time'].transform(lambda time: time.diff().shift(-1)), + ) + new_columns.update({col: grouped[col].shift(-1) for col in post_df_cols}) + df = df.assign(**new_columns) + + df.set_index('Time', inplace=True) + + # Only keep the rows from the pre_df, they have all the necessary info. + df = df.loc[df["is_pre"]] + # Drop the rows from pre_df with not matching row from post_df. + df.dropna(inplace=True) + + df.drop(columns=["is_pre"], inplace=True) + + return df + + def _resolve_x(y, x): """ Resolve the `x` series to use for derivative and integral operations @@ -1632,7 +1716,7 @@ class SignalDesc: @SeriesAccessor.register_accessor -def series_convert(series, dtype): +def series_convert(series, dtype, nullable=None): """ Convert a :class:`pandas.Series` with a best effort strategy. @@ -1662,9 +1746,35 @@ def series_convert(series, dtype): negative values, as there is no way to reliably distinguish between conversion failures reasons. :type dtype: str or collections.abc.Callable + + :param nullable: If: + + - ``True``, use the nullable dtype equivalent of the requested dtype. + - ``None``, use the equivalent nullable dtype if there is any missing + data, otherwise a non-nullable dtype will be used for lower + memory consumption. + :type nullable: bool or None """ - if series.dtype.name == dtype: + nullable_dtypes = { + 'int': 'Int64', + 'int8': 'Int8', + 'int16': 'Int16', + 'int32': 'Int32', + 'int64': 'Int64', + + 'uint': 'UInt64', + 'uint8': 'UInt8', + 'uint16': 'UInt16', + 'uint32': 'UInt32', + 'uint64': 'UInt64', + + 'bool': 'boolean', + } + + if series.dtype.name == dtype and \ + not (nullable and dtype in nullable_dtypes): + # If there is a conversion to a nullable dtype, don't skip. return series def to_object(x): @@ -1675,7 +1785,8 @@ def series_convert(series, dtype): return x astype = lambda dtype: lambda x: x.astype(dtype, copy=False) - make_convert = lambda dtype: lambda x: series_convert(x, dtype) + make_convert = lambda dtype: lambda x: series_convert(x, dtype, + nullable=nullable) basic = astype(dtype) class Tree(list): @@ -1751,36 +1862,32 @@ def series_convert(series, dtype): # Then try with a nullable type. # Floats are already nullable so we don't need to do anything elif is_bool or is_int: - nullable_dtypes = { - 'int': 'Int64', - 'int8': 'Int8', - 'int16': 'Int16', - 'int32': 'Int32', - 'int64': 'Int64', - - 'uint': 'UInt64', - 'uint8': 'UInt8', - 'uint16': 'UInt16', - 'uint32': 'UInt32', - 'uint64': 'UInt64', - - 'bool': 'boolean', - } # Bare nullable dtype # Already nullable if dtype[0].isupper(): - nullable = dtype + nullable_type = dtype else: - nullable = nullable_dtypes[dtype] - to_nullable = astype(nullable) + nullable_type = nullable_dtypes[dtype] + to_nullable = astype(nullable_type) - # Strategy assuming it's already a numeric type - from_numeric = Alternative( - basic, - to_nullable - ) + if nullable: + # Only allow nullable dtype conversion. + from_numeric = Alternative( + to_nullable + ) + elif nullable is None: + # (nullable == None): default behaviour, try both. + from_numeric = Alternative( + basic, + to_nullable + ) + else: + # Do not convert to nullable dtype unless the user specified one. + from_numeric = Alternative( + basic + ) if is_int: parse = Alternative( @@ -1902,6 +2009,23 @@ def series_convert(series, dtype): return pipelines(series) +@DataFrameAccessor.register_accessor +def df_convert_to_nullable(df): + """ + Convert the columns of the dataframe to their equivalent nullable dtype, + when possible. + + :param df: The dataframe to convert. + :type df: pandas.DataFrame + + :returns: The dataframe with converted columns. + """ + def _series_convert(column): + return series_convert(column, str(column.dtype), nullable=True) + + return df.apply(_series_convert, raw=False) + + # Defined outside SignalDesc as it references SignalDesc itself _SIGNALS = [ SignalDesc('sched_switch', ['next_comm', 'next_pid']), @@ -1919,6 +2043,8 @@ _SIGNALS = [ SignalDesc('cpu_frequency', ['cpu_id']), SignalDesc('userspace@cpu_frequency_devlib', ['cpu_id']), SignalDesc('sched_compute_energy', ['comm', 'pid']), + SignalDesc('sched_pre_feec', ['comm', 'pid', 'prev_cpu']), + SignalDesc('sched_post_feec', ['comm', 'pid', 'dst_cpu']), SignalDesc('sched_pelt_se', ['comm', 'pid']), SignalDesc('sched_load_se', ['comm', 'pid']), diff --git a/lisa/trace.py b/lisa/trace.py index af41b0126ef2e2f8da6b57d9a443962a2d23d580..f581d85eeb4a547b1dad11feef77b143906e5e6d 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -1247,6 +1247,20 @@ class TxtTraceParser(TxtTraceParserBase): 'prev_cpu': _KERNEL_DTYPE['cpu'], }, ), + 'sched_pre_feec': dict( + fields={ + 'comm': _KERNEL_DTYPE['comm'], + 'prev_cpu': _KERNEL_DTYPE['cpu'], + 'pid': _KERNEL_DTYPE['pid'], + }, + ), + 'sched_post_feec': dict( + fields={ + 'comm': _KERNEL_DTYPE['comm'], + 'dst_cpu': _KERNEL_DTYPE['cpu'], + 'pid': _KERNEL_DTYPE['pid'], + }, + ), 'sched_pelt_cfs': dict( fields={ 'cpu': _KERNEL_DTYPE['cpu'],