From b2aad497cef55fe8cafbcaf2b34c21d25fa954e9 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 22 Jan 2018 14:25:37 +0000 Subject: [PATCH 1/8] wltests: log directory being parsed If we log the directory we're currently parsing, it helps to reassure that things are not frozen and also lets you link back from missing results to tests. Signed-off-by: Chris Redpath --- libs/utils/wa_results_collector.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/utils/wa_results_collector.py b/libs/utils/wa_results_collector.py index a4a01868d..e3a7abef2 100644 --- a/libs/utils/wa_results_collector.py +++ b/libs/utils/wa_results_collector.py @@ -142,6 +142,7 @@ class WaResultsCollector(object): df = pd.DataFrame() for wa_dir in wa_dirs: + self._log.info("Reading wa_dir %s", wa_dir) df = df.append(self._read_wa_dir(wa_dir)) kernel_refs = {} -- GitLab From 9f948565303c5b067ceacf560651eb784ca5d421 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 22 Jan 2018 14:26:48 +0000 Subject: [PATCH 2/8] wltests/wa_collector: Add memoization for _get_metric_df This appears to improve speed quite a lot when generating reports, but likely costs a bit of RAM. Signed-off-by: Chris Redpath --- libs/utils/wa_results_collector.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/utils/wa_results_collector.py b/libs/utils/wa_results_collector.py index e3a7abef2..dd2d62b6d 100644 --- a/libs/utils/wa_results_collector.py +++ b/libs/utils/wa_results_collector.py @@ -116,6 +116,7 @@ class WaResultsCollector(object): self._log = logging.getLogger('WaResultsCollector') + self._metric_df_cache = {} if base_dir: base_dir = os.path.expanduser(base_dir) if not isinstance(wa_dirs, basestring): @@ -525,6 +526,10 @@ class WaResultsCollector(object): """ Common helper for getting results to plot for a given metric """ + lookup = (workload, metric, tag, kernel, test) + if lookup in self._metric_df_cache: + return self._metric_df_cache[lookup] + df = self._select(tag, kernel, test) if df.empty: self._log.warn("No data to plot for (tag: %s, kernel: %s, test: %s)", @@ -553,6 +558,7 @@ class WaResultsCollector(object): raise RuntimError('Found different units for workload "{}" metric "{}": {}' .format(workload, metric, units)) + self._metric_df_cache[lookup] = df return df -- GitLab From bfa8b583f5d26f1440768fd2fbd6a79296faa62c Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 22 Jan 2018 14:28:24 +0000 Subject: [PATCH 3/8] wltests/wa_collector: Improve wa directory loading performance Instead of adding each job's results dataframe as we generate it, we can instead store them in a list and add everything in one go. For me, this improved load performance greatly. Signed-off-by: Chris Redpath --- libs/utils/wa_results_collector.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libs/utils/wa_results_collector.py b/libs/utils/wa_results_collector.py index dd2d62b6d..bdcbc86f5 100644 --- a/libs/utils/wa_results_collector.py +++ b/libs/utils/wa_results_collector.py @@ -232,6 +232,7 @@ class WaResultsCollector(object): tag_map = {} test_map = {} job_dir_map = {} + extra_dfs = [] for job in jobs: workload = job['workload_name'] @@ -301,7 +302,9 @@ class WaResultsCollector(object): extra_df.loc[:, 'tag'] = tag extra_df.loc[:, 'test'] = test - df = df.append(extra_df) + extra_dfs.append(extra_df) + + df = df.append(extra_dfs) for iteration, job_ids in skipped_jobs.iteritems(): self._log.warning("Skipped failed iteration %d for jobs:", iteration) -- GitLab From e8736832062adff9f73af073572e917427b72b6a Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 22 Jan 2018 17:46:58 +0000 Subject: [PATCH 4/8] wltests/wa_result_collector: add cache for kernel sha1 lookups Signed-off-by: Chris Redpath --- libs/utils/wa_results_collector.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libs/utils/wa_results_collector.py b/libs/utils/wa_results_collector.py index bdcbc86f5..bbcbdabc4 100644 --- a/libs/utils/wa_results_collector.py +++ b/libs/utils/wa_results_collector.py @@ -117,6 +117,7 @@ class WaResultsCollector(object): self._log = logging.getLogger('WaResultsCollector') self._metric_df_cache = {} + self._kernel_sha1_cache = {} if base_dir: base_dir = os.path.expanduser(base_dir) if not isinstance(wa_dirs, basestring): @@ -491,9 +492,15 @@ class WaResultsCollector(object): """ Find the SHA1 of the kernel that a WA3 run was run against """ + if wa_dir in self._kernel_sha1_cache: + return self._kernel_sha1_cache[wa_dir] + with open(os.path.join(wa_dir, '__meta', 'target_info.json')) as f: target_info = json.load(f) - return KernelVersion(target_info['kernel_release']).sha1 + + sha1 = KernelVersion(target_info['kernel_release']).sha1 + self._kernel_sha1_cache[wa_dir] = sha1 + return sha1 def _select(self, tag='.*', kernel='.*', test='.*'): _df = self.results_df -- GitLab From c6600d5e911c565c62fbfd54ed4ed1db786a64f4 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 22 Jan 2018 17:48:37 +0000 Subject: [PATCH 5/8] wltests/wa_results_collector: remove df append in _get_extra_job_metrics Replace the multiple append ops with a single list append. Signed-off-by: Chris Redpath --- libs/utils/wa_results_collector.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/libs/utils/wa_results_collector.py b/libs/utils/wa_results_collector.py index bbcbdabc4..55bb0d2db 100644 --- a/libs/utils/wa_results_collector.py +++ b/libs/utils/wa_results_collector.py @@ -430,11 +430,11 @@ class WaResultsCollector(object): """ # return # value,metric,units - metrics_df = pd.DataFrame() + extra_metric_list = [] artifacts = self._read_artifacts(job_dir) if self.parse_traces and 'trace-cmd-bin' in artifacts: - metrics_df = metrics_df.append( + extra_metric_list.append( self._get_trace_metrics(artifacts['trace-cmd-bin'])) if 'jankbench_results_csv' in artifacts: @@ -443,7 +443,7 @@ class WaResultsCollector(object): df.loc[:, 'metric'] = 'frame_total_duration' df.loc[:, 'units'] = 'ms' - metrics_df = metrics_df.append(df) + extra_metric_list.append(df) # WA's metrics model just exports overall energy metrics, not individual # samples. We're going to extend that with individual samples so if you @@ -473,7 +473,7 @@ class WaResultsCollector(object): df.loc[:, 'units'] = 'watts' - metrics_df = metrics_df.append(df) + extra_metric_list.append(df) elif 'output_power' in df.columns and 'USB_power' in df.columns: # Looks like this is from a Monsoon # For monsoon the USB and device power are collected @@ -484,9 +484,11 @@ class WaResultsCollector(object): df.loc[:, 'metric'] = 'device_power_sample' df.loc[:, 'units'] = 'watts' - metrics_df = metrics_df.append(df) - - return metrics_df + extra_metric_list.append(df) + if len(extra_metric_list) > 0: + return pd.DataFrame().append(extra_metric_list) + else: + return pd.DataFrame() def _wa_get_kernel_sha1(self, wa_dir): """ -- GitLab From 20c036e20ccba80fa821d184533cc78595fbdae9 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 22 Jan 2018 17:49:50 +0000 Subject: [PATCH 6/8] wltests/wa_results_collector: Add a cache to _select results Signed-off-by: Chris Redpath --- libs/utils/wa_results_collector.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/utils/wa_results_collector.py b/libs/utils/wa_results_collector.py index 55bb0d2db..96ab951c3 100644 --- a/libs/utils/wa_results_collector.py +++ b/libs/utils/wa_results_collector.py @@ -118,6 +118,7 @@ class WaResultsCollector(object): self._metric_df_cache = {} self._kernel_sha1_cache = {} + self._select_cache = {} if base_dir: base_dir = os.path.expanduser(base_dir) if not isinstance(wa_dirs, basestring): @@ -505,10 +506,15 @@ class WaResultsCollector(object): return sha1 def _select(self, tag='.*', kernel='.*', test='.*'): + key = (tag, kernel, test) + if key in self._select_cache: + return self._select_cache[key] + _df = self.results_df _df = _df[_df.tag.str.contains(tag)] _df = _df[_df.kernel.str.contains(kernel)] _df = _df[_df.test.str.contains(test)] + self._select_cache[key] = _df return _df @property -- GitLab From 5636d07a718d9bd8107579dc39a46364a4ee8f3b Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 22 Jan 2018 17:51:03 +0000 Subject: [PATCH 7/8] wltests/wa_results_collector: Add a cache to tests() We can spend 3-4s in every for test in collector.tests() operation in our notebook. Lets add a cache to minimise this time. Signed-off-by: Chris Redpath --- libs/utils/wa_results_collector.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/libs/utils/wa_results_collector.py b/libs/utils/wa_results_collector.py index 96ab951c3..c86a586da 100644 --- a/libs/utils/wa_results_collector.py +++ b/libs/utils/wa_results_collector.py @@ -119,6 +119,8 @@ class WaResultsCollector(object): self._metric_df_cache = {} self._kernel_sha1_cache = {} self._select_cache = {} + self._tests = {} + if base_dir: base_dir = os.path.expanduser(base_dir) if not isinstance(wa_dirs, basestring): @@ -530,10 +532,13 @@ class WaResultsCollector(object): return self.results_df['tag'].unique() def tests(self, workload=None): - df = self.results_df if workload: - df = df[df['workload'] == workload] - return df['test'].unique() + if workload not in self._tests: + df = self.results_df[self.results_df['workload'] == workload] + self._tests[workload] = df['test'].unique() + return self._tests[workload] + + return self.results_df['test'].unique() def workload_available_metrics(self, workload): return (self.results_df -- GitLab From 8dee82cc2a8a34c04ea6390f4484e7aac6c69642 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 22 Jan 2018 17:52:19 +0000 Subject: [PATCH 8/8] wltests/wa_results_collector: df.append only once when loading wa_dirs The wa_dir loading loop calls df.append repeatedly. It's more efficient to call once with a list of dfs. Signed-off-by: Chris Redpath --- libs/utils/wa_results_collector.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libs/utils/wa_results_collector.py b/libs/utils/wa_results_collector.py index c86a586da..a2e4e9c1c 100644 --- a/libs/utils/wa_results_collector.py +++ b/libs/utils/wa_results_collector.py @@ -146,9 +146,11 @@ class WaResultsCollector(object): self.use_cached_trace_metrics = use_cached_trace_metrics df = pd.DataFrame() + df_list = [] for wa_dir in wa_dirs: self._log.info("Reading wa_dir %s", wa_dir) - df = df.append(self._read_wa_dir(wa_dir)) + df_list.append(self._read_wa_dir(wa_dir)) + df = df.append(df_list) kernel_refs = {} if kernel_repo_path: -- GitLab