From 650efd4dbf77ab104a59d009791d95fdf02bab90 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Mon, 4 Jan 2021 14:43:39 +0000 Subject: [PATCH 1/6] lisa.datautils: add nullable parameter to series_convert() series_convert() convert a pandas.Series to a given datatype. The current behaviour of series_convert() is to convert the serie to the nullable version of the datatype only if the non-nullable version conversion failed. This patch adds a new "nullable" parameter allowing to force/prevent a conversion to a nullable datatype. Letting this parameter unset keeps the current function's behaviour. Reference: https://pandas.pydata.org/pandas-docs/version/1.0.0/user_guide/integer_na.html Signed-off-by: Pierre Gondois --- lisa/datautils.py | 75 +++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 26 deletions(-) diff --git a/lisa/datautils.py b/lisa/datautils.py index c87123ca5..a9c1e1967 100644 --- a/lisa/datautils.py +++ b/lisa/datautils.py @@ -1632,7 +1632,7 @@ class SignalDesc: @SeriesAccessor.register_accessor -def series_convert(series, dtype): +def series_convert(series, dtype, nullable=None): """ Convert a :class:`pandas.Series` with a best effort strategy. @@ -1662,9 +1662,35 @@ def series_convert(series, dtype): negative values, as there is no way to reliably distinguish between conversion failures reasons. :type dtype: str or collections.abc.Callable + + :param nullable: If: + + - ``True``, use the nullable dtype equivalent of the requested dtype. + - ``None``, use the equivalent nullable dtype if there is any missing + data, otherwise a non-nullable dtype will be used for lower + memory consumption. + :type nullable: bool or None """ - if series.dtype.name == dtype: + nullable_dtypes = { + 'int': 'Int64', + 'int8': 'Int8', + 'int16': 'Int16', + 'int32': 'Int32', + 'int64': 'Int64', + + 'uint': 'UInt64', + 'uint8': 'UInt8', + 'uint16': 'UInt16', + 'uint32': 'UInt32', + 'uint64': 'UInt64', + + 'bool': 'boolean', + } + + if series.dtype.name == dtype and \ + not (nullable and dtype in nullable_dtypes): + # If there is a conversion to a nullable dtype, don't skip. return series def to_object(x): @@ -1675,7 +1701,8 @@ def series_convert(series, dtype): return x astype = lambda dtype: lambda x: x.astype(dtype, copy=False) - make_convert = lambda dtype: lambda x: series_convert(x, dtype) + make_convert = lambda dtype: lambda x: series_convert(x, dtype, + nullable=nullable) basic = astype(dtype) class Tree(list): @@ -1751,36 +1778,32 @@ def series_convert(series, dtype): # Then try with a nullable type. # Floats are already nullable so we don't need to do anything elif is_bool or is_int: - nullable_dtypes = { - 'int': 'Int64', - 'int8': 'Int8', - 'int16': 'Int16', - 'int32': 'Int32', - 'int64': 'Int64', - - 'uint': 'UInt64', - 'uint8': 'UInt8', - 'uint16': 'UInt16', - 'uint32': 'UInt32', - 'uint64': 'UInt64', - - 'bool': 'boolean', - } # Bare nullable dtype # Already nullable if dtype[0].isupper(): - nullable = dtype + nullable_type = dtype else: - nullable = nullable_dtypes[dtype] - to_nullable = astype(nullable) + nullable_type = nullable_dtypes[dtype] + to_nullable = astype(nullable_type) - # Strategy assuming it's already a numeric type - from_numeric = Alternative( - basic, - to_nullable - ) + if nullable: + # Only allow nullable dtype conversion. + from_numeric = Alternative( + to_nullable + ) + elif nullable is None: + # (nullable == None): default behaviour, try both. + from_numeric = Alternative( + basic, + to_nullable + ) + else: + # Do not convert to nullable dtype unless the user specified one. + from_numeric = Alternative( + basic + ) if is_int: parse = Alternative( -- GitLab From e8253ab7de5ec076c19c11e4663b86759f6c6763 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Mon, 4 Jan 2021 14:44:27 +0000 Subject: [PATCH 2/6] lisa.datautils: add df_convert_to_nullable() Add df_convert_to_nullable(), allowing to convert the columns of a dataframe to their equivalent nullable datatype. Reference: https://pandas.pydata.org/pandas-docs/version/1.0.0/user_guide/integer_na.html Signed-off-by: Pierre Gondois --- lisa/datautils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/lisa/datautils.py b/lisa/datautils.py index a9c1e1967..22676c2c1 100644 --- a/lisa/datautils.py +++ b/lisa/datautils.py @@ -1925,6 +1925,23 @@ def series_convert(series, dtype, nullable=None): return pipelines(series) +@DataFrameAccessor.register_accessor +def df_convert_to_nullable(df): + """ + Convert the columns of the dataframe to their equivalent nullable dtype, + when possible. + + :param df: The dataframe to convert. + :type df: pandas.DataFrame + + :returns: The dataframe with converted columns. + """ + def _series_convert(column): + return series_convert(column, str(column.dtype), nullable=True) + + return df.apply(_series_convert, raw=False) + + # Defined outside SignalDesc as it references SignalDesc itself _SIGNALS = [ SignalDesc('sched_switch', ['next_comm', 'next_pid']), -- GitLab From 58ef2da2b07272acde845aa63bc03e9c61fc00b3 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Tue, 15 Dec 2020 16:04:34 +0000 Subject: [PATCH 3/6] lisa.datautils: Add df_delta() function The df_delta function merges two dataframes of events that are intended to be consecutive, e.g. the first 'pre' event is emitted as the entry of a function, and a'post' event is emitted at the exit. The function keeps all the columns of the 'pre' and 'post' dataframe. It adds an additional 'delta' column containing the duration between the emission of a 'pre' event and its consecutive 'post' event. Signed-off-by: Pierre Gondois --- lisa/datautils.py | 84 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/lisa/datautils.py b/lisa/datautils.py index 22676c2c1..91342e703 100644 --- a/lisa/datautils.py +++ b/lisa/datautils.py @@ -416,6 +416,90 @@ def df_merge(df_list, drop_columns=None, drop_inplace=False, filter_columns=None return functools.reduce(merge, df_list) +def df_delta(pre_df, post_df, group_on): + """ + pre_df and post_df containing paired/consecutive events indexed by time, + df_delta() merges the two dataframes and adds a ``delta`` column + containing the time spent between the two events. + A typical usecase would be adding pre/post events at the entry/exit of a + function. + + Rows from ``pre_df`` and ``post_df`` are grouped by the ``group_on`` + columns. + E.g.: ``['pid', 'comm']`` to group by task. + Except columns listed in ``group_on``, ``pre_df`` and ``post_df`` must + have columns with different names. + + Events that cannot be paired are ignored. + + :param pre_df: Dataframe containing the events that start a record. + :type pre_df: pandas.DataFrame + + :param post_df: Dataframe containing the events that end a record. + :type post_df: pandas.DataFrame + + :param group_on: Columns used to group ``pre_df`` and ``post_df``. + E.g.: This would be ``['pid', 'comm']`` to group by task. + :type group_on: list(str) + + :returns: a :class:`pandas.DataFrame` indexed by the ``pre_df`` dataframe + with: + + * All the columns from the ``pre_df`` dataframe. + * All the columns from the ``post_df`` dataframe. + * A ``delta`` column (duration between the emission of a 'pre' event + and its consecutive 'post' event). + """ + pre_df = pre_df.copy(deep=False) + post_df = post_df.copy(deep=False) + + # Tag the rows to remember from which df they are coming from. + pre_df["is_pre"] = True + post_df["is_pre"] = False + + # Merge on columns common to the two dfs to avoid overlapping of names. + on_col = list(sorted(pre_df.columns & post_df.columns)) + + # Merging on nullable types converts columns to object. + # Merging on non-nullable types converts integer/boolean to float. + # Thus, let the on_col non-nullable and converts the others to nullable. + pre_df_cols = list(set(pre_df) - set(on_col)) + post_df_cols = list(set(post_df) - set(on_col)) + pre_df[pre_df_cols] = df_convert_to_nullable(pre_df[pre_df_cols]) + post_df[post_df_cols] = df_convert_to_nullable(post_df[post_df_cols]) + + # Merge. Don't allow column renaming. + df = pd.merge(pre_df, post_df, left_index=True, right_index=True, on=on_col, + how='outer', suffixes=(False, False)) + df.index.name = 'Time' + df.reset_index(inplace=True) + + # In each group, search for a faulty sequence (where pre/post events are + # not interleaving, e.g. pre1->pre2->post1->post2). + grouped = df.groupby(group_on, observed=True, sort=False) + if grouped['is_pre'].transform(lambda x: x == x.shift()).any(): + raise ValueError('Unexpected sequence of pre and post event (more than one "pre" or "post" in a row)') + + # Create the 'delta' column and add the columns from post_df + # in the rows coming from pre_df. + new_columns = dict( + delta=grouped['Time'].transform(lambda time: time.diff().shift(-1)), + ) + new_columns.update({col: grouped[col].shift(-1) for col in post_df_cols}) + df = df.assign(**new_columns) + + df.set_index('Time', inplace=True) + + # Only keep the rows from the pre_df, they have all the necessary info. + df = df.loc[df["is_pre"]] + # Drop the rows from pre_df with not matching row from post_df. + df.dropna(inplace=True) + + df.drop(columns=["is_pre"], inplace=True) + + return df + + def _resolve_x(y, x): """ Resolve the `x` series to use for derivative and integral operations -- GitLab From 495383bae31e7a9996c36a59d40ade0abe89b9f9 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Mon, 14 Dec 2020 11:46:03 +0000 Subject: [PATCH 4/6] lisa: Add sched_[pre|post]_feec signals To monitor the "find_energy_efficient_cpu" function, two ftrace events are added at the entrance/exit of the function. This patch enables them in lisa. Signed-off-by: Pierre Gondois --- lisa/datautils.py | 2 ++ lisa/trace.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/lisa/datautils.py b/lisa/datautils.py index 91342e703..842a9c33b 100644 --- a/lisa/datautils.py +++ b/lisa/datautils.py @@ -2043,6 +2043,8 @@ _SIGNALS = [ SignalDesc('cpu_frequency', ['cpu_id']), SignalDesc('userspace@cpu_frequency_devlib', ['cpu_id']), SignalDesc('sched_compute_energy', ['comm', 'pid']), + SignalDesc('sched_pre_feec', ['comm', 'pid', 'prev_cpu']), + SignalDesc('sched_post_feec', ['comm', 'pid', 'dst_cpu']), SignalDesc('sched_pelt_se', ['comm', 'pid']), SignalDesc('sched_load_se', ['comm', 'pid']), diff --git a/lisa/trace.py b/lisa/trace.py index af41b0126..f581d85ee 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -1247,6 +1247,20 @@ class TxtTraceParser(TxtTraceParserBase): 'prev_cpu': _KERNEL_DTYPE['cpu'], }, ), + 'sched_pre_feec': dict( + fields={ + 'comm': _KERNEL_DTYPE['comm'], + 'prev_cpu': _KERNEL_DTYPE['cpu'], + 'pid': _KERNEL_DTYPE['pid'], + }, + ), + 'sched_post_feec': dict( + fields={ + 'comm': _KERNEL_DTYPE['comm'], + 'dst_cpu': _KERNEL_DTYPE['cpu'], + 'pid': _KERNEL_DTYPE['pid'], + }, + ), 'sched_pelt_cfs': dict( fields={ 'cpu': _KERNEL_DTYPE['cpu'], -- GitLab From efb77086a5a1fa2bb84c99e02b9cfe68fcd593f6 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 6 Jan 2021 17:37:13 +0000 Subject: [PATCH 5/6] lisa.analysis: Add eas.py module and df_feec_delta() df_feec_delta() returns a dataframe containing information related to the "find_energy_efficient_cpu" function, such as: - time the function was called - time spent in the function - previous cpu the task was running on - destination cpu selected by feec The function requires the two following events: - sched_pre_feec - sched_post_feec This function is placed in the new eas.py module, intending to contain eas specific tools. Signed-off-by: Pierre Gondois --- lisa/analysis/eas.py | 72 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 lisa/analysis/eas.py diff --git a/lisa/analysis/eas.py b/lisa/analysis/eas.py new file mode 100644 index 000000000..79beaf4be --- /dev/null +++ b/lisa/analysis/eas.py @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (C) 2020, ARM Limited and contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pandas as pd +import numpy as np + +from lisa.analysis.base import TraceAnalysisBase +from lisa.trace import requires_events +from lisa.datautils import df_delta + +class EASAnalysis(TraceAnalysisBase): + """ + Energy Aware Scheduler (EAS) specific analysis. + + :param trace: input Trace object + :type trace: lisa.trace.Trace + """ + name = 'eas' + +############################################################################### +# DataFrame Getter Methods +############################################################################### + + @TraceAnalysisBase.cache + @requires_events('sched_pre_feec', 'sched_post_feec') + def df_feec_delta(self, tasks=None): + """ + DataFrame containing ``find_energy_efficient_cpu`` (feec) related + information. + + :param tasks: Task names or PIDs or ``(pid, comm)`` to look for. + :type tasks: list(int or str or tuple(int, str)) + + :returns: a :class:`pandas.DataFrame` indexed by ``Time`` with: + + - A ``pid`` column. + - A ``comm`` column. + - A ``__cpu`` column (the cpu feec was executed on). + - A ``prev_cpu`` column (the cpu the task was running on). + - A ``dst_cpu`` column (the cpu selected by feec). + - A ``delta`` column (duration of feec function call). + """ + pre_df = self.trace.df_event('sched_pre_feec') + post_df = self.trace.df_event('sched_post_feec') + + # Filter the tasks. + if tasks: + pre_df = df_filter_task_ids(pre_df, tasks) + post_df = df_filter_task_ids(post_df, tasks) + + pre_df = pre_df.drop(columns=['__comm', '__pid']) + post_df = post_df.drop(columns=['__comm', '__pid']) + + # Also group by '__cpu': the events must be emitted from the same cpu. + return df_delta(pre_df, post_df, ['pid', 'comm', '__cpu']) + + +# vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 -- GitLab From 61f230517e53216425a9d8f52a420e0ba564d801 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 6 Jan 2021 18:13:58 +0000 Subject: [PATCH 6/6] lisa.doc: add lisa.analysis.eas section in doc Add lisa.analysis.eas section in the documentation. Signed-off-by: Pierre Gondois --- doc/trace_analysis.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/trace_analysis.rst b/doc/trace_analysis.rst index b9044f7c5..3fe70732b 100644 --- a/doc/trace_analysis.rst +++ b/doc/trace_analysis.rst @@ -206,3 +206,9 @@ Trace parsers .. autoclass:: lisa.trace.TrappyTraceParser :members: + +EAS ++++++++ + +.. automodule:: lisa.analysis.eas + :members: -- GitLab