From 4fe4d9c0048e341bbe14ad8a3a94865c06e694f6 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 7 Jul 2025 14:49:31 +0100 Subject: [PATCH] cli: Refactor to create join_results() and filter_results() Introduce new utility functions `join_results()` and `filter_results()` in fastpath/utils/table.py to encapsulate this shared logic. The former takes the dictionary of resultstore tables and returns a single results table with much of the common metadata joined in. The latter filters the results table by sut/swprofile/benchmark/resultclass. Update all previous call sites to use this new utility, reducing redundancy and improving maintainability. As a beneficial side effect, we clean up display of the resultclass in the dashboard so remove the units. Units are still displayed on the Y-axis of the graph. Co-developed-by: Aishwarya TCV Signed-off-by: Aishwarya TCV Co-developed-by: Ryan Roberts Signed-off-by: Ryan Roberts --- fastpath/commands/verbs/result/show.py | 48 ++++++++------------------ fastpath/dashboard/dashboard.py | 43 ++++++----------------- fastpath/utils/table.py | 43 +++++++++++++++++++++++ 3 files changed, 68 insertions(+), 66 deletions(-) diff --git a/fastpath/commands/verbs/result/show.py b/fastpath/commands/verbs/result/show.py index e34189e..25df47d 100644 --- a/fastpath/commands/verbs/result/show.py +++ b/fastpath/commands/verbs/result/show.py @@ -10,7 +10,7 @@ import scipy as sp import tabulate from fastpath.commands import cliutils from fastpath.utils import term -from fastpath.utils.table import Table, load_tables +from fastpath.utils.table import load_tables, join_results, filter_results tabulate.PRESERVE_WHITESPACE = True @@ -152,36 +152,10 @@ def dispatch(args): arguments comply with those requested in add_parser(). """ tables = load_tables(args.resultstore, not args.no_merge_similar) - results = tables[Table.RESULT] - - # Join all the info we need from all the tables into a single uber-table. - results = results.join( - tables[Table.RESULTCLASS][ - ["benchmark_id", "name", "unit", "improvement"] - ], - on="resultclass_id", - ) - results = results.join( - tables[Table.SUT]["unique"].rename("sut"), on="sut_id" - ) - results = results.join( - tables[Table.SWPROFILE]["unique"].rename("swprofile"), on="swprofile_id" - ) - results = results.join( - tables[Table.BENCHMARK]["unique"].rename("benchmark"), on="benchmark_id" - ) - results["resultclass"] = results.apply( - lambda row: f"{row['name']} ({row['unit']})", axis=1 - ) - results = results.drop(["name", "unit"], axis=1) + results = join_results(tables) # Filter results by the sut/swprofile/benchmark IDs provided on cli. - if args.sut: - results = results[results["sut"].isin(args.sut)] - if args.swprofile: - results = results[results["swprofile"].isin(args.swprofile)] - if args.benchmark: - results = results[results["benchmark"].isin(args.benchmark)] + results = filter_results(results, args.sut, args.swprofile, args.benchmark) if len(results) == 0: raise Exception("No results to display after filtering") @@ -239,6 +213,10 @@ def dispatch(args): term.page_out(title.getvalue() + table) +pivot_index = ["benchmark", "resultclass", "unit", "improvement"] +leading_cols = len(pivot_index) + + def pivot_results(df, suts, swprofiles): def my_agg(group): values = group["value"] @@ -269,14 +247,14 @@ def pivot_results(df, suts, swprofiles): ) df = ( df.groupby( - ["benchmark", "resultclass", "improvement", "sut", "swprofile"], + [*pivot_index, "sut", "swprofile"], observed=True, ) .apply(my_agg) .reset_index() ) df = df.pivot( - index=["benchmark", "resultclass", "improvement"], + index=pivot_index, columns=["sut", "swprofile"], values=["min", "ci95min", "mean", "ci95max", "max", "stddev", "count"], ) @@ -309,7 +287,6 @@ def compute_change(results, args): cilow = results.xs(key="ci95min", level="summary", axis=1).reset_index() cihigh = results.xs(key="ci95max", level="summary", axis=1).reset_index() - leading_cols = 3 rows, cols = mean.shape sets = results.columns.get_level_values(1).unique() @@ -376,7 +353,6 @@ def pretty_results_multi(results, args): mean = results.xs(key="mean", level="summary", axis=1).reset_index() change = results.xs(key="change", level="summary", axis=1).reset_index() - leading_cols = 3 rows, cols = mean.shape pretty = mean.copy() @@ -458,6 +434,12 @@ def pretty_results(results, args): # Slim down resultclass name when there is overlap with the benchmark name. results["resultclass"] = results.apply(resultclass_slim, axis=1) + # Include the units with the resultclass name for display purposes. + results["resultclass"] = results.apply( + lambda row: f"{row['resultclass']} ({row['unit']})", axis=1 + ) + results = results.drop(["unit"], axis=1) + # Replace nans with empty space. results = results.replace( { diff --git a/fastpath/dashboard/dashboard.py b/fastpath/dashboard/dashboard.py index e089e56..4edcd4b 100644 --- a/fastpath/dashboard/dashboard.py +++ b/fastpath/dashboard/dashboard.py @@ -9,6 +9,7 @@ import streamlit as st import natsort as ns from fastpath.utils import table +from fastpath.utils.table import join_results from fastpath.commands.verbs.result import list as listobjs from fastpath.commands.verbs.result import show from fastpath.commands.verbs.result.show import compute_change @@ -24,32 +25,6 @@ def load_tables(resultstore_path: str): return table.load_tables(resultstore_path) -def build_joined_results(tables: dict) -> pd.DataFrame: - results = ( - tables[table.Table.RESULT] - .join( - tables[table.Table.RESULTCLASS][ - ["benchmark_id", "name", "unit", "improvement"] - ], - on="resultclass_id", - ) - .join(tables[table.Table.SUT]["unique"].rename("sut"), on="sut_id") - .join( - tables[table.Table.SWPROFILE]["unique"].rename("swprofile"), - on="swprofile_id", - ) - .join( - tables[table.Table.BENCHMARK]["unique"].rename("benchmark"), - on="benchmark_id", - ) - ) - results["resultclass"] = results.apply( - lambda row: f"{row['name']} ({row['unit']})", axis=1 - ) - results.drop(["name"], axis=1, inplace=True) - return results - - def build_metadata_table(tables: dict, object_type: str) -> pd.DataFrame: args = argparse.Namespace(all=True, ascii=True, id=None) fn = { @@ -235,7 +210,7 @@ def render_deviation_comparison_chart( ] # Melt change data melted = change_df.melt( - id_vars=["benchmark", "resultclass", "improvement"], + id_vars=["benchmark", "resultclass", "unit", "improvement"], var_name="comparison", value_name="category", ) @@ -248,7 +223,7 @@ def render_deviation_comparison_chart( ) # Melt mean_df for clean mean lookup mean_melted = mean_df.melt( - id_vars=["benchmark", "resultclass", "improvement"], + id_vars=["benchmark", "resultclass", "unit", "improvement"], var_name="comparison", value_name="mean", ) @@ -256,7 +231,7 @@ def render_deviation_comparison_chart( merged = pd.merge( melted, mean_melted, - on=["benchmark", "resultclass", "improvement", "comparison"], + on=["benchmark", "resultclass", "unit", "improvement", "comparison"], ) # Find baseline mean for each group and merge baseline_df = mean_melted[ @@ -264,10 +239,12 @@ def render_deviation_comparison_chart( ] baseline_df = baseline_df.rename(columns={"mean": "baseline_mean"}) baseline_df = baseline_df[ - ["benchmark", "resultclass", "improvement", "baseline_mean"] + ["benchmark", "resultclass", "unit", "improvement", "baseline_mean"] ] merged = pd.merge( - merged, baseline_df, on=["benchmark", "resultclass", "improvement"] + merged, + baseline_df, + on=["benchmark", "resultclass", "unit", "improvement"], ) # Compute deviation if show_relative: @@ -333,9 +310,9 @@ def main(): st.sidebar.header("Filter Results") tables = load_tables(sys.argv[1]) - results_df = build_joined_results(tables) + results = join_results(tables) - filtered = apply_filters(results_df) + filtered = apply_filters(results) if not filtered: return diff --git a/fastpath/utils/table.py b/fastpath/utils/table.py index 793bd22..42681ed 100644 --- a/fastpath/utils/table.py +++ b/fastpath/utils/table.py @@ -94,3 +94,46 @@ def load_tables(resultstore, merge_similar=True): add_unique_col(dfs[Table.SUT], "name") return dfs + + +def join_results(tables): + """ + Convenience function to join the RESULT table with related metadata from + RESULTCLASS, SUT, SWPROFILE, and BENCHMARK tables, resulting in a unified + pandas DataFrame. + """ + results = tables[Table.RESULT].join( + tables[Table.RESULTCLASS][ + ["benchmark_id", "name", "unit", "improvement"] + ].rename(columns={"name": "resultclass"}), + on="resultclass_id", + ) + results = results.join( + tables[Table.SUT]["unique"].rename("sut"), on="sut_id" + ) + results = results.join( + tables[Table.SWPROFILE]["unique"].rename("swprofile"), on="swprofile_id" + ) + results = results.join( + tables[Table.BENCHMARK]["unique"].rename("benchmark"), on="benchmark_id" + ) + return results + + +def filter_results( + df, suts=None, swprofiles=None, benchmarks=None, resultclasses=None +): + """ + Convenience function to filter a results DataFrame by SUT, SWPROFILE, BENCHMARK, + and RESULTCLASS. Each filter is optional; if provided, the DataFrame will be + filtered to rows matching any values in the corresponding list. + """ + if suts: + df = df[df["sut"].isin(suts)] + if swprofiles: + df = df[df["swprofile"].isin(swprofiles)] + if benchmarks: + df = df[df["benchmark"].isin(benchmarks)] + if resultclasses: + df = df[df["resultclass"].isin(resultclasses)] + return df -- GitLab