From ddcda63e05c6082ac7adb21876a4f9571be6e78c Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 22 Jan 2018 14:25:37 +0000 Subject: [PATCH 1/3] wltests: log directory being parsed If we log the directory we're currently parsing, it helps to reassure that things are not frozen and also lets you link back from missing results to tests. Signed-off-by: Chris Redpath --- libs/utils/wa_results_collector.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/utils/wa_results_collector.py b/libs/utils/wa_results_collector.py index 075cea671..1124274a9 100644 --- a/libs/utils/wa_results_collector.py +++ b/libs/utils/wa_results_collector.py @@ -142,6 +142,7 @@ class WaResultsCollector(object): df = pd.DataFrame() for wa_dir in wa_dirs: + self._log.info("Reading wa_dir %s", wa_dir) df = df.append(self._read_wa_dir(wa_dir)) kernel_refs = {} -- GitLab From f0d1fa2449cba25936b2fb2bc6f2c259c60641b8 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 22 Jan 2018 14:26:48 +0000 Subject: [PATCH 2/3] wltests/wa_collector: Make some methods memoized This appears to improve speed quite a lot when generating reports, but likely costs a bit of RAM. Signed-off-by: Chris Redpath --- libs/utils/wa_results_collector.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/utils/wa_results_collector.py b/libs/utils/wa_results_collector.py index 1124274a9..6ee68e254 100644 --- a/libs/utils/wa_results_collector.py +++ b/libs/utils/wa_results_collector.py @@ -33,6 +33,7 @@ from conf import LisaLogging from bart.common.Utils import area_under_curve from devlib.target import KernelVersion +from devlib.utils.misc import memoized from trappy.utils import handle_duplicate_index from IPython.display import display @@ -487,6 +488,7 @@ class WaResultsCollector(object): return metrics_df + @memoized def _wa_get_kernel_sha1(self, wa_dir): """ Find the SHA1 of the kernel that a WA3 run was run against @@ -495,6 +497,7 @@ class WaResultsCollector(object): target_info = json.load(f) return KernelVersion(target_info['kernel_release']).sha1 + @memoized def _select(self, tag='.*', kernel='.*', test='.*'): _df = self.results_df _df = _df[_df.tag.str.contains(tag)] @@ -514,6 +517,7 @@ class WaResultsCollector(object): def tags(self): return self.results_df['tag'].unique() + @memoized def tests(self, workload=None): df = self.results_df if workload: @@ -525,10 +529,12 @@ class WaResultsCollector(object): .groupby('workload').get_group(workload) ['metric'].unique()) + @memoized def _get_metric_df(self, workload, metric, tag, kernel, test): """ Common helper for getting results to plot for a given metric """ + df = self._select(tag, kernel, test) if df.empty: self._log.warn("No data to plot for (tag: %s, kernel: %s, test: %s)", -- GitLab From 856efdf9f52ceeef9846112f17266c904fe94891 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 22 Jan 2018 14:28:24 +0000 Subject: [PATCH 3/3] wltests/wa_collector: Replace multiple DF append Whenever possible, instead of keep adding results dataframe as we generate it, we can instead store them in a list and add everything in one go. Signed-off-by: Chris Redpath --- libs/utils/wa_results_collector.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/libs/utils/wa_results_collector.py b/libs/utils/wa_results_collector.py index 6ee68e254..77474b7b6 100644 --- a/libs/utils/wa_results_collector.py +++ b/libs/utils/wa_results_collector.py @@ -142,9 +142,11 @@ class WaResultsCollector(object): self.use_cached_trace_metrics = use_cached_trace_metrics df = pd.DataFrame() + df_list = [] for wa_dir in wa_dirs: self._log.info("Reading wa_dir %s", wa_dir) - df = df.append(self._read_wa_dir(wa_dir)) + df_list.append(self._read_wa_dir(wa_dir)) + df = df.append(df_list) kernel_refs = {} if kernel_repo_path: @@ -232,6 +234,7 @@ class WaResultsCollector(object): tag_map = {} test_map = {} job_dir_map = {} + extra_dfs = [] for job in jobs: workload = job['workload_name'] @@ -300,8 +303,12 @@ class WaResultsCollector(object): extra_df.loc[:, 'id'] = job_id extra_df.loc[:, 'tag'] = tag extra_df.loc[:, 'test'] = test + # Collect all these DFs to merge them in one go at the end. + extra_dfs.append(extra_df) - df = df.append(extra_df) + # Append all extra DFs to the results WA's results DF + if extra_dfs: + df = df.append(extra_dfs) for iteration, job_ids in skipped_jobs.iteritems(): self._log.warning("Skipped failed iteration %d for jobs:", iteration) @@ -426,11 +433,11 @@ class WaResultsCollector(object): """ # return # value,metric,units - metrics_df = pd.DataFrame() + extra_metric_list = [] artifacts = self._read_artifacts(job_dir) if self.parse_traces and 'trace-cmd-bin' in artifacts: - metrics_df = metrics_df.append( + extra_metric_list.append( self._get_trace_metrics(artifacts['trace-cmd-bin'])) if 'jankbench_results_csv' in artifacts: @@ -439,7 +446,7 @@ class WaResultsCollector(object): df.loc[:, 'metric'] = 'frame_total_duration' df.loc[:, 'units'] = 'ms' - metrics_df = metrics_df.append(df) + extra_metric_list.append(df) # WA's metrics model just exports overall energy metrics, not individual # samples. We're going to extend that with individual samples so if you @@ -473,7 +480,7 @@ class WaResultsCollector(object): df.loc[:, 'units'] = 'watts' - metrics_df = metrics_df.append(df) + extra_metric_list.append(df) elif 'output_power' in df.columns and 'USB_power' in df.columns: # Looks like this is from a Monsoon # For monsoon the USB and device power are collected @@ -484,9 +491,11 @@ class WaResultsCollector(object): df.loc[:, 'metric'] = 'device_power_sample' df.loc[:, 'units'] = 'watts' - metrics_df = metrics_df.append(df) - - return metrics_df + extra_metric_list.append(df) + if len(extra_metric_list) > 0: + return pd.DataFrame().append(extra_metric_list) + else: + return pd.DataFrame() @memoized def _wa_get_kernel_sha1(self, wa_dir): -- GitLab