From 93dc9955266e7d974adfcd2cba4151af2ce6df33 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Wed, 31 May 2017 11:39:01 +0100 Subject: [PATCH 1/3] Update some code for Pandas v0.20.1 This Pandas release changes the returned format of groupby.describe [1]. Update sme code snippets to handle this change. Essentially, we no longer need to call `unstack` on the result. In the tuturial notebook, assume the user has just installed LISA and so has the latest pandas, and just remove the unstack call (currently in commented code) completey to keep things simple. In the Jankbench notebook, add a comment so that existing users who have not updated their pandas have a line to uncomment if their notebook stops working after they pull in this patch (which they probably won't do). In library code, try to work with both output formats. Ideally we should have a proper solution for depency management but I don't think any contributors have the required knowledge of Python distribution "best practices" at the moment, and I don't think it's worth spending the time to learn right now. [1] http://pandas.pydata.org/pandas-docs/version/0.20/whatsnew.html#groupby-describe-formatting --- .../android/benchmarks/Android_Jankbench.ipynb | 5 ++++- ipynb/tutorial/00_LisaInANutshell.ipynb | 1 - libs/utils/analysis/tasks_analysis.py | 12 ++++++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ipynb/examples/android/benchmarks/Android_Jankbench.ipynb b/ipynb/examples/android/benchmarks/Android_Jankbench.ipynb index 9dab2491c..88347cca0 100644 --- a/ipynb/examples/android/benchmarks/Android_Jankbench.ipynb +++ b/ipynb/examples/android/benchmarks/Android_Jankbench.ipynb @@ -399,7 +399,10 @@ "def overall_statistics(df):\n", " byname_test = df.groupby(['name','test']).total_duration.describe(percentiles=[0.9, 0.95, 0.99])\n", " stats = pd.DataFrame(byname_test)\n", - " stats = stats.unstack()\n", + " # If using old Pandas, convert GroupBy.describe format to new version\n", + " # http://pandas.pydata.org/pandas-docs/version/0.20/whatsnew.html#groupby-describe-formatting\n", + " if 'count' not in stats.columns:\n", + " stats = stats.unstack\n", " return stats\n", "\n", "stats = overall_statistics(df)\n", diff --git a/ipynb/tutorial/00_LisaInANutshell.ipynb b/ipynb/tutorial/00_LisaInANutshell.ipynb index d91c10412..2d117c0d9 100644 --- a/ipynb/tutorial/00_LisaInANutshell.ipynb +++ b/ipynb/tutorial/00_LisaInANutshell.ipynb @@ -2060,7 +2060,6 @@ "\n", "# # Group sched_switch event by task switching into the CPU\n", "# df = df.groupby('next_pid').describe(include=['object'])\n", - "# df = df.unstack()\n", "\n", "# # Sort sched_switch events by number of time a task switch into the CPU\n", "# df = df['next_comm'].sort_values(by=['count'], ascending=False)\n", diff --git a/libs/utils/analysis/tasks_analysis.py b/libs/utils/analysis/tasks_analysis.py index 42beae6a2..534f8e649 100644 --- a/libs/utils/analysis/tasks_analysis.py +++ b/libs/utils/analysis/tasks_analysis.py @@ -20,6 +20,7 @@ import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt import numpy as np +import pandas as pd import pylab as pl import re @@ -77,10 +78,13 @@ class TasksAnalysis(AnalysisModule): len(big_tasks), min_utilization) # Compute number of samples above threshold - big_tasks_stats = big_tasks_events.groupby('pid')\ - .describe(include=['object']) - big_tasks_stats = big_tasks_stats.unstack()['comm']\ - .sort_values(by=['count'], ascending=False) + desc = big_tasks_events.groupby('pid').describe(include=['object']) + if isinstance(desc.index, pd.MultiIndex): + # We must be running on a pre-0.20.0 version of pandas. + # unstack will convert the old output format to the new. + # http://pandas.pydata.org/pandas-docs/version/0.20/whatsnew.html#groupby-describe-formatting + desc = desc.unstack() + big_tasks_stats = desc['comm'].sort_values(by=['count'], ascending=False) # Filter for number of occurrences big_tasks_stats = big_tasks_stats[big_tasks_stats['count'] > min_samples] -- GitLab From fcc140b3d6aeffcb9655d9a79ace3c21323e0825 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 30 May 2017 15:43:31 +0100 Subject: [PATCH 2/3] travis: Add LISA tools/x86_64 to $PATH for trace-cmd Travis uses Ubuntu 12.04 (Precise Pangolin), whose trace-cmd is very old and doesn't work with TRAPpy. Just use the static executable we include in LISA instead. --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index fd4a0c04e..5d071be42 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,4 +22,5 @@ install: script: - cd $TRAVIS_BUILD_DIR - 'echo backend : Agg > matplotlibrc' # Otherwise it tries to use tkinter + - export PATH=$TRAVIS_BUILD_DIR/tools/x86_64/:$PATH # For trace-cmd - source init_env && lisa-test tests/lisa/ -- GitLab From e518a31314459c69576f3f5b9ca5c5e2d30abc54 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Wed, 6 Dec 2017 16:26:53 +0000 Subject: [PATCH 3/3] tasks_analysis: Disable plotting cluster residency when no data _plotTaskResidencies is big.LITTLE specific, disable it if we don't have bL data. --- libs/utils/analysis/tasks_analysis.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/libs/utils/analysis/tasks_analysis.py b/libs/utils/analysis/tasks_analysis.py index 534f8e649..8faf169a3 100644 --- a/libs/utils/analysis/tasks_analysis.py +++ b/libs/utils/analysis/tasks_analysis.py @@ -311,17 +311,21 @@ class TasksAnalysis(AnalysisModule): signals_to_plot = {'residencies'} signals_to_plot = list(signals_to_plot.intersection(signals)) if len(signals_to_plot) > 0: - axes = plt.subplot(gs[plot_id, 0]) - axes.set_title( - 'Task [{0:d}:{1:s}] Residency (green: LITTLE, red: big)' - .format(tid, task_name) - ) - plot_id = plot_id + 1 - is_last = (plot_id == plots_count) - if 'sched_overutilized' in signals: - signals_to_plot.append('sched_overutilized') - self._plotTaskResidencies(axes, tid, signals_to_plot, is_last) - savefig = True + if not self._trace.has_big_little: + self._log.warning( + 'No big.LITTLE platform data, residencies plot disabled') + else: + axes = plt.subplot(gs[plot_id, 0]) + axes.set_title( + 'Task [{0:d}:{1:s}] Residency (green: LITTLE, red: big)' + .format(tid, task_name) + ) + plot_id = plot_id + 1 + is_last = (plot_id == plots_count) + if 'sched_overutilized' in signals: + signals_to_plot.append('sched_overutilized') + self._plotTaskResidencies(axes, tid, signals_to_plot, is_last) + savefig = True # Plot PELT signals signals_to_plot = {'load_sum', 'util_sum', 'period_contrib'} -- GitLab