From d603a604e9a1f9760db7e85dee78f3e4140b6a43 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 29 Jan 2019 15:29:17 +0000 Subject: [PATCH 01/26] lisa: utils: remove warning about LISA_HOME not set Catching that at a such highlevel triggers the warning even when it is completely irrelevant, such as generating the documentation. If the env var is not set, the Python variable will be None and code making a use of it as a string will fail anyway, so there is no risk of silent issue. --- lisa/utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/lisa/utils.py b/lisa/utils.py index 7af99785b..426f9fac2 100644 --- a/lisa/utils.py +++ b/lisa/utils.py @@ -46,9 +46,6 @@ LISA_HOME = os.getenv('LISA_HOME') The detected location of your LISA installation """ -if not LISA_HOME: - logging.getLogger(__name__).warning('LISA_HOME env var is not set, LISA may misbehave.') - class Loggable: """ A simple class for uniformly named loggers -- GitLab From a79c439d264f91f3fc6c9853021fb7e9748bdb6b Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Wed, 23 Jan 2019 15:33:52 +0000 Subject: [PATCH 02/26] exekall: warn about partial annotations Add DEBUG (or INFO with --verbose) log entry to warn the user about partial annotations of callables. Along with a class being abstract, that should help understanding why a testcase is not showing up when it is expected to. --- tools/exekall/exekall/engine.py | 15 +++++++++++++-- tools/exekall/exekall/utils.py | 25 ++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/tools/exekall/exekall/engine.py b/tools/exekall/exekall/engine.py index 512b01afc..2c62db473 100644 --- a/tools/exekall/exekall/engine.py +++ b/tools/exekall/exekall/engine.py @@ -1456,6 +1456,9 @@ class Expression(ExpressionBase): class AnnotationError(Exception): pass +class PartialAnnotationError(AnnotationError): + pass + class ForcedParamType: pass @@ -1730,6 +1733,7 @@ class Operator: sig = self.signature first_param = utils.take_first(sig.parameters) annotation_map = utils.resolve_annotations(self.annotations, self.callable_globals) + pristine_annotation_map = copy.copy(annotation_map) extra_ignored_param = set() # If it is a class @@ -1771,9 +1775,16 @@ class Operator: param not in extra_ignored_param and param not in self.ignored_param ): - raise AnnotationError('Missing annotation for "{param}" parameters of operator "{op}"'.format( + # If some parameters are annotated but not all, we raise a + # slightly different exception to allow better reporting + if pristine_annotation_map: + excep_cls = PartialAnnotationError + else: + excep_cls = AnnotationError + + raise excep_cls('Missing annotation for "{param}" parameters of operator "{op}"'.format( param = param, - op = self.callable_, + op = self.name, )) # Iterate over keys and values of "mapping" in the same order as "keys" diff --git a/tools/exekall/exekall/utils.py b/tools/exekall/exekall/utils.py index caedc0340..f26971e36 100644 --- a/tools/exekall/exekall/utils.py +++ b/tools/exekall/exekall/utils.py @@ -33,18 +33,30 @@ def get_callable_set(module_set, verbose=False): module.__package__.split('.', 1)[0] for module in module_set } callable_set = set() + visited_obj_set = set() for module in get_recursive_module_set(module_set, package_set): - callable_set.update(_get_callable_set(module, verbose=verbose)) + callable_set.update(_get_callable_set( + module, + visited_obj_set, + verbose=verbose, + )) return callable_set -def _get_callable_set(module, verbose): +def _get_callable_set(module, visited_obj_set, verbose): + log_f = info if verbose else debug callable_pool = set() + for name, obj in vars(module).items(): # skip internal classes that may end up being exposed as a global if inspect.getmodule(obj) is engine: continue + if id(obj) in visited_obj_set: + continue + else: + visited_obj_set.add(id(obj)) + # If it is a class, get the list of methods if isinstance(obj, type): callable_list = list(dict(inspect.getmembers(obj, predicate=callable)).values()) @@ -57,6 +69,14 @@ def _get_callable_set(module, verbose): for callable_ in callable_list: try: param_list, return_type = engine.Operator(callable_).get_prototype() + # If the callable is partially annotated, warn about it since it is + # likely to be a mistake. + except engine.PartialAnnotationError as e: + log_f('Partially-annotated callable will not be used: {e}'.format( + callable=get_name(callable_), + e=e, + )) + continue # If something goes wrong, that means it is not properly annotated # so we just ignore it except (AttributeError, ValueError, KeyError, engine.AnnotationError): @@ -66,7 +86,6 @@ def _get_callable_set(module, verbose): # return a abstract base class instance, since that would not work # anyway. if inspect.isabstract(return_type): - log_f = info if verbose else debug log_f('Instances of {} will not be created since it has non-implemented abstract methods'.format( get_name(return_type, full_qual=True) )) -- GitLab From 93f723d94ffde51cc04e0056837d8b7bc12d0f72 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 22 Jan 2019 15:37:31 +0000 Subject: [PATCH 03/26] exekall: Fix FrozenExprVal.get_id() --- tools/exekall/exekall/engine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/exekall/exekall/engine.py b/tools/exekall/exekall/engine.py index 2c62db473..6ecf784af 100644 --- a/tools/exekall/exekall/engine.py +++ b/tools/exekall/exekall/engine.py @@ -2168,6 +2168,7 @@ class FrozenExprVal(ExprValBase): return tuple(sorted(kwargs.items())) def get_id(self, full_qual=True, qual=True, with_tags=True): + full_qual = full_qual and qual key = self._make_id_key( full_qual=full_qual, qual=qual, -- GitLab From 8515beaaea7b692b4472a910ecfd88cd620b3c0e Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 22 Jan 2019 15:35:56 +0000 Subject: [PATCH 04/26] exekall: save a text traceback in FrozenExprVal Since traceback objects cannot be serialized, keep a text version of them. --- tools/exekall/exekall/engine.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/exekall/exekall/engine.py b/tools/exekall/exekall/engine.py index 6ecf784af..4b9fffa15 100644 --- a/tools/exekall/exekall/engine.py +++ b/tools/exekall/exekall/engine.py @@ -2105,6 +2105,11 @@ class FrozenExprVal(ExprValBase): self.recorded_id_map = recorded_id_map super().__init__(param_map=param_map, value=value, excep=excep) + if self.excep is not NoValue: + self.excep_tb = utils.format_exception(self.excep) + else: + self.excep_tb = None + @property def type_names(self): return [ -- GitLab From 835a700d4956ce83678219ba1d5cd53c6ee404a1 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 22 Jan 2019 16:22:35 +0000 Subject: [PATCH 05/26] bisector: small cleanup --- tools/bisector/bisector/bisector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bisector/bisector/bisector.py b/tools/bisector/bisector/bisector.py index 69908e3a6..cb440fd87 100755 --- a/tools/bisector/bisector/bisector.py +++ b/tools/bisector/bisector/bisector.py @@ -4050,7 +4050,7 @@ class Report(Serializable): excep = import_steps_from_yaml(steps_path) # Then try to import the files as recorded in the report else: - excep = import_files(preamble.src_files) + excep = import_files(src_files) return excep -- GitLab From 0fae7f7fc720875f68b749b4a69808e9b3bc5732 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 22 Jan 2019 15:38:12 +0000 Subject: [PATCH 06/26] exekall: remove unused parameter --- tools/exekall/exekall/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/exekall/exekall/_utils.py b/tools/exekall/exekall/_utils.py index 66c7ee259..f22c05e9c 100644 --- a/tools/exekall/exekall/_utils.py +++ b/tools/exekall/exekall/_utils.py @@ -82,7 +82,7 @@ def get_mro(cls): assert isinstance(cls, type) return inspect.getmro(cls) -def get_name(obj, full_qual=True, qual=True, desugar_cls_meth=False): +def get_name(obj, full_qual=True, qual=True): # full_qual enabled implies qual enabled _qual = qual or full_qual # qual disabled implies full_qual disabled -- GitLab From d4be6d0f1c183b9bc2bf98247b7bd7bf2e999bd5 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Mon, 21 Jan 2019 15:52:58 +0000 Subject: [PATCH 07/26] exekall: allow pruning ValueDB --- tools/exekall/exekall/engine.py | 56 +++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tools/exekall/exekall/engine.py b/tools/exekall/exekall/engine.py index 4b9fffa15..63ad8e04f 100644 --- a/tools/exekall/exekall/engine.py +++ b/tools/exekall/exekall/engine.py @@ -246,6 +246,49 @@ class ValueDB: else: return froz_val_set_set + def prune_by_predicate(self, predicate): + def prune(froz_val): + if isinstance(froz_val, PrunedFrozVal): + return froz_val + elif predicate(froz_val): + return PrunedFrozVal(froz_val) + else: + # Edit the param_map in-place, so we keep it potentially shared + # if possible. + for param, param_froz_val in list(froz_val.param_map.items()): + froz_val.param_map[param] = prune(param_froz_val) + + return froz_val + + def make_froz_val_seq(froz_val_seq): + froz_val_list = [ + prune(froz_val) + for froz_val in froz_val_seq + # Just remove the root PrunedFrozVal, since they are useless at + # this level (i.e. nothing depends on them) + if not predicate(froz_val) + ] + + # All param_map will be the same in the list by construction + try: + param_map = froz_val_list[0].param_map + except IndexError: + param_map = {} + + return FrozenExprValSeq( + froz_val_list=froz_val_list, + param_map=param_map, + ) + + return self.__class__( + froz_val_seq_list=[ + make_froz_val_seq(froz_val_seq) + # That will keep proper inter-object references as in the + # original graph of objects + for froz_val_seq in copy.deepcopy(self.froz_val_seq_list) + ] + ) + def get_all(self, **kwargs): return self.get_by_predicate(lambda froz_val: True, **kwargs) @@ -265,6 +308,7 @@ class ValueDB: return self.get_by_predicate(predicate, **kwargs) + class ScriptValueDB: def __init__(self, db, var_name='db'): self.db = db @@ -2181,6 +2225,18 @@ class FrozenExprVal(ExprValBase): ) return self.recorded_id_map[key] +class PrunedFrozVal(FrozenExprVal): + def __init__(self, froz_val): + super().__init__( + param_map={}, + value=NoValue, + excep=NoValue, + uuid=froz_val.uuid, + callable_qualname=froz_val.callable_qualname, + callable_name=froz_val.callable_name, + recorded_id_map=copy.copy(froz_val.recorded_id_map), + ) + class FrozenExprValSeq(collections.abc.Sequence): def __init__(self, froz_val_list, param_map): self.froz_val_list = froz_val_list -- GitLab From 314d4de986fa0afc6841b5f886923ae5f9361899 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 22 Jan 2019 15:35:33 +0000 Subject: [PATCH 08/26] exekall: Add ValueDB.get_roots() --- tools/exekall/exekall/engine.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/exekall/exekall/engine.py b/tools/exekall/exekall/engine.py index 63ad8e04f..e2c67df06 100644 --- a/tools/exekall/exekall/engine.py +++ b/tools/exekall/exekall/engine.py @@ -246,6 +246,16 @@ class ValueDB: else: return froz_val_set_set + def get_roots(self, flatten=True): + froz_val_set_set = { + frozenset(froz_val_seq) + for froz_val_seq in self.froz_val_seq_list + } + if flatten: + return set(utils.flatten_seq(froz_val_set_set)) + else: + return froz_val_set_set + def prune_by_predicate(self, predicate): def prune(froz_val): if isinstance(froz_val, PrunedFrozVal): -- GitLab From 19197c3a4cc5311233388bce212c34cedc4e1f83 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 22 Jan 2019 15:34:34 +0000 Subject: [PATCH 09/26] lisa: change installation order Prepare the ground for following commits, which may need exekall installed before LISA so the dependencies are satisfied. --- devmode_requirements.txt | 2 +- doc/doc_requirements.txt | 2 +- requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/devmode_requirements.txt b/devmode_requirements.txt index 722b259d2..647fd5d89 100644 --- a/devmode_requirements.txt +++ b/devmode_requirements.txt @@ -1,6 +1,7 @@ # Must be ordered to satisfy the dependencies without pulling from PyPI. # Once they are found by pip in editable mode as specified here, they will be # used and not looked up on PyPI. +-e ./tools/exekall # devlib before WA and LISA -e ./external/devlib/ @@ -14,5 +15,4 @@ -e ./external/workload-automation/ -e ./[notebook,doc,test] --e ./tools/exekall -e ./tools/bisector[dbus] diff --git a/doc/doc_requirements.txt b/doc/doc_requirements.txt index c789b6208..a93872b8f 100644 --- a/doc/doc_requirements.txt +++ b/doc/doc_requirements.txt @@ -1,4 +1,5 @@ # A streamlined version of devmode_requirements.txt for doc building +-e ./tools/exekall # devlib before WA and LISA -e ./external/devlib/ @@ -12,5 +13,4 @@ -e ./external/workload-automation/ -e ./[doc] --e ./tools/exekall -e ./tools/bisector diff --git a/requirements.txt b/requirements.txt index aa56fac3c..ee4334a48 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -./[notebook,doc,test] ./tools/exekall +./[notebook,doc,test] ./tools/bisector[dbus] -- GitLab From 25b3493a30b162299b750af5be457a038c5c7ca6 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 22 Jan 2019 15:43:53 +0000 Subject: [PATCH 10/26] bisector/exekall: drop xunit support Now use exekall's ValueDB directly. This gives access to the full ResultBundle in bisector, and removes some XML-related glue code. Since the ValueDB is a tad big when the iteration count grows, only the root values and exceptions are kept by default. This can be overriden with "prune-db" option of exekall-LISA-test step class, if the user wants to get a full DB when using -oexport-db. --- lisa/exekall_customize.py | 139 +---------------- tools/bisector/bisector/bisector.py | 233 +++++++++++++++++----------- 2 files changed, 147 insertions(+), 225 deletions(-) diff --git a/lisa/exekall_customize.py b/lisa/exekall_customize.py index 59cd1c014..874fce276 100644 --- a/lisa/exekall_customize.py +++ b/lisa/exekall_customize.py @@ -16,26 +16,23 @@ # limitations under the License. # +import argparse import contextlib import itertools -import inspect -import functools -import logging -import sys +import re import os.path from pathlib import Path -import xml.etree.ElementTree as ET -import traceback +from collections import OrderedDict, namedtuple from lisa.env import TestEnv, TargetConf from lisa.platforms.platinfo import PlatformInfo from lisa.utils import HideExekallID, Loggable, ArtifactPath, get_subclasses, groupby, Serializable from lisa.conf import MultiSrcConf -from lisa.tests.base import TestBundle, Result, ResultBundle, CannotCreateError +from lisa.tests.base import TestBundle from lisa.tests.scheduler.load_tracking import FreqInvarianceItem -from exekall.utils import info, get_name, get_mro, NoValue -from exekall.engine import ExprData, Consumer, PrebuiltOperator, ValueDB +from exekall.utils import get_name +from exekall.engine import ExprData, Consumer, PrebuiltOperator from exekall.customization import AdaptorBase class NonReusable: @@ -243,129 +240,5 @@ class LISAAdaptor(AdaptorBase): return tags - def get_summary(self, result_map): - summary = super().get_summary(result_map) - - # The goal is to implement something that is roughly compatible with: - # https://github.com/jenkinsci/xunit-plugin/blob/master/src/main/resources/org/jenkinsci/plugins/xunit/types/model/xsd/junit-10.xsd - # This way, Jenkins should be able to read it, and other tools as well - - xunit_path = self.args.artifact_dir.joinpath('xunit.xml') - hidden_callable_set = { - op.callable_ - for op in self.hidden_op_set - } - et_root = self._create_xunit(result_map, hidden_callable_set) - et_tree = ET.ElementTree(et_root) - info('Writing xUnit file at: ' + str(xunit_path)) - et_tree.write(str(xunit_path)) - return summary - - def _create_xunit(self, result_map, hidden_callable_set): - et_testsuites = ET.Element('testsuites') - - testcase_list = list(result_map.keys()) - # We group by module in which the root operators are defined. There will - # be one testsuite for each such module. - def key(expr): - return expr.op.mod_name - - # One testsuite per module where a root operator is defined - for mod_name, group in groupby(testcase_list, key=key): - testcase_list = list(group) - et_testsuite = ET.SubElement(et_testsuites, 'testsuite', attrib=dict( - name = mod_name - )) - testsuite_counters = dict(failures=0, errors=0, tests=0, skipped=0) - - for testcase in testcase_list: - artifact_path = testcase.data.get('artifact_dir', None) - - # If there is more than one value for a given expression, we - # assume that they testcase will have unique names using tags - expr_val_list = result_map[testcase] - for expr_val in expr_val_list: - - # Get the set of UUIDs of all TestBundle instances that were - # involved in the testcase. - def bundle_predicate(expr_val): - return issubclass(expr_val.expr.op.value_type, TestBundle) - bundle_uuid_set = { - expr_val.uuid - for expr_val in expr_val.get_by_predicate(bundle_predicate) - } - bundle_uuid_set.discard(None) - - et_testcase = ET.SubElement(et_testsuite, 'testcase', dict( - name = expr_val.get_id( - full_qual=False, - qual=False, - with_tags=True, - hidden_callable_set=hidden_callable_set, - ), - # This may help locating the artifacts, even though it - # will only be valid on the machine it was produced on - artifact_path=str(artifact_path), - bundle_uuids=','.join(sorted(bundle_uuid_set)), - )) - testsuite_counters['tests'] += 1 - - for failed_expr_val in expr_val.get_excep(): - excep = failed_expr_val.excep - # When one critical object cannot be created, we assume - # the test was skipped. - if isinstance(excep, CannotCreateError): - result = 'skipped' - testsuite_counters['skipped'] += 1 - else: - result = 'error' - testsuite_counters['errors'] += 1 - - short_msg = str(excep) - msg = ''.join(traceback.format_exception(type(excep), excep, excep.__traceback__)) - type_ = type(excep) - - _append_result_tag(et_testcase, result, type_, short_msg, msg) - - value = expr_val.value - if isinstance(value, ResultBundle): - result = RESULT_TAG_MAP[value.result] - short_msg = value.result.lower_name - msg = str(value) - type_ = type(value) - - _append_result_tag(et_testcase, result, type_, short_msg, msg) - if value.result is Result.FAILED: - testsuite_counters['failures'] += 1 - - et_testsuite.attrib.update( - (k, str(v)) for k, v in testsuite_counters.items() - ) - - return et_testsuites - # Expose it as a module-level name load_db = LISAAdaptor.load_db - -def _append_result_tag(et_testcase, result, type_, short_msg, msg): - et_result = ET.SubElement(et_testcase, result, dict( - type=get_name(type_, full_qual=True), - type_bases=','.join( - get_name(type_, full_qual=True) - for type_ in get_mro(type_) - ), - message=str(short_msg), - )) - et_result.text = str(msg) - return et_result - -RESULT_TAG_MAP = { - # "passed" is an extension to xUnit format that we add for parsing - # convenience - Result.PASSED: 'passed', - Result.FAILED: 'failure', - # This tag is not part of xUnit format but necessary for our reporting - Result.UNDECIDED: 'undecided' -} -# Make sure we cover all cases -assert set(RESULT_TAG_MAP.keys()) == set(Result) diff --git a/tools/bisector/bisector/bisector.py b/tools/bisector/bisector/bisector.py index cb440fd87..be0b54852 100755 --- a/tools/bisector/bisector/bisector.py +++ b/tools/bisector/bisector/bisector.py @@ -1713,10 +1713,12 @@ class LISATestStepResult(StepResult): name = 'LISA-test', ) - def __init__(self, xunit, results_path, *args, **kwargs): - super().__init__(*args, **kwargs) + # TODO: get rid of xunit and only use db + def __init__(self, results_path, db=None, xunit=None, **kwargs): + super().__init__(**kwargs) self.xunit = xunit self.results_path = results_path + self.db = db def is_excep_ignored(excep, ignored_except_set): # If the type is missing, assume a generic Exception @@ -1759,7 +1761,7 @@ def deprecated_parse_testcase_res(testcase, kind, ignored_except_set): class ExekallLISATestStep(ShellStep): """ - Execute an exekall LISA test command and collect xUnit results. Also + Execute an exekall LISA test command and collect exekall's ValueDB. Also compress the result directory and record its path. It will also define some environment variables that are expected to be used by the command to be able to locate resources to collect. @@ -1772,12 +1774,14 @@ class ExekallLISATestStep(ShellStep): use_systemd_run = False, compress_artifact = True, upload_artifact = False, + prune_db = True, ) options = dict( __init__ = dict( compress_artifact = BoolParam('compress the exekall artifact directory in an archive'), upload_artifact = BoolParam('upload the exekall artifact directory to Artifactorial as the execution goes, and delete the local archive.'), + prune_db = BoolParam("Prune exekall's ValueDB so that only roots values are preserved. That allows smaller reports that are faster to load"), # Some options are not supported **filter_keys(StepBase.options['__init__'], remove={'trials'}), ), @@ -1790,13 +1794,13 @@ class ExekallLISATestStep(ShellStep): show_pass_rate = BoolParam('always show the pass rate of tests, even when there are failures or crashes as well'), show_details = ChoiceOrBoolParam(['msg'], 'show details of results. Use "msg" for only a brief message'), show_artifact_dirs = BoolParam('show exekall artifact directory for all iterations'), - testcase = CommaListParam('show only the test cases matching one of the patterns in the comma-separated list. * can be used to match any part of the name.'), + testcase = CommaListParam('show only the test cases matching one of the patterns in the comma-separated list. * can be used to match any part of the name'), ignore_testcase = CommaListParam('completely ignore test cases matching one of the patterns in the comma-separated list. * can be used to match any part of the name.'), ignore_non_issue = BoolParam('consider only tests that failed'), ignore_excep = CommaListParam('ignore the given comma-separated list of exceptions that caused tests failure or error. * can be used to match any part of the name'), dump_artifact_dirs = BoolOrStrParam('write the list of exekall artifact directories to a file. Useful to implement garbage collection of unreferenced artifact archives'), - xunit2json = BoolOrStrParam('append consolidated xUnit information to a JSON file'), - export_logs = BoolOrStrParam('export the logs, xUnit file and artifact directory symlink to the given directory'), + export_db = BoolOrStrParam('export a merged exekall ValueDB, merging it with existing ValueDB if the file exists', allow_empty=False), + export_logs = BoolOrStrParam('export the logs and artifact directory symlink to the given directory'), download = BoolParam('Download the exekall artifact archives if necessary'), upload_artifact = BoolParam('upload the artifact directory to Artifactorial and update the in-memory report. Following env var are needed: ARTIFACTORIAL_FOLDER set to the folder URL and ARTIFACTORIAL_TOKEN. Note: --export should be used to save the report with updated paths'), ) @@ -1806,6 +1810,7 @@ class ExekallLISATestStep(ShellStep): def __init__(self, compress_artifact = Default, upload_artifact = Default, + prune_db = Default, **kwargs ): kwargs['trials'] = 1 @@ -1818,8 +1823,12 @@ class ExekallLISATestStep(ShellStep): compress_artifact = True self.compress_artifact = compress_artifact + self.prune_db = prune_db def run(self, i_stack, service_hub): + from exekall.utils import NoValue + from exekall.engine import ValueDB + artifact_path = os.getenv( 'EXEKALL_ARTIFACT_ROOT', # default value @@ -1827,7 +1836,9 @@ class ExekallLISATestStep(ShellStep): ) # Create a unique artifact dir - artifact_path = os.path.join(artifact_path, uuid.uuid4().hex) + date = datetime.datetime.now().strftime('%Y%m%d_%H:%M:%S') + name = '{}_{}'.format(date, uuid.uuid4().hex) + artifact_path = os.path.join(artifact_path, name) # This also strips the trailing /, which is needed later on when # archiving the artifact. @@ -1847,29 +1858,31 @@ class ExekallLISATestStep(ShellStep): else: bisect_ret = BisectRet.GOOD - # First item is the oldest created file - xunit_path_list = sorted( - glob.glob(os.path.join(artifact_path, '**/xunit.xml'), recursive=True), - key=lambda x: os.path.getmtime(x) - ) - xunit_report = '' - if xunit_path_list: - # Take the most recent file - xunit_path = xunit_path_list[-1] - if len(xunit_path_list) > 1: - warn('Found more than one xUnit file, only taking the most recent one ({}): {}'.format( - xunit_path, - ', '.join(xunit_path_list) - )) - - try: - with open(xunit_path, 'r') as xunit_file: - xunit_report = xunit_file.read() - except OSError as e: - warn('Could not open xUnit XML {}: {}'.format(xunit_path, e)) - + db_path = os.path.join(artifact_path, 'VALUE_DB.pickle.xz') + try: + db = ValueDB.from_path(db_path) + except Exception as e: + warn('Could not read DB at {}: {}'.format(db_path, e)) + db = None else: - warn('No xUnit file found under: {}'.format(artifact_path)) + if self.prune_db: + # Prune the DB so we only keep the root values in it, or the + # exceptions + root_froz_val_uuids = { + froz_val.uuid + for froz_val in db.get_roots() + } + def prune_predicate(froz_val): + return not ( + # Keep the root values, usually ResultBundle's. We + # cannot compare the object themselves since they have + # been copied to the pruned DB. + froz_val.uuid in root_froz_val_uuids + or + # keep errors and values leading to them + froz_val.value is NoValue + ) + db = db.prune_by_predicate(prune_predicate) # Compress artifact directory if self.compress_artifact: @@ -1918,14 +1931,14 @@ class ExekallLISATestStep(ShellStep): path = artifact_local_path, )) else: - error('No upload service available, could not upload exkeall artifact.') + error('No upload service available, could not upload exekall artifact.') return LISATestStepResult( step = self, res_list = res_list, bisect_ret = bisect_ret, - xunit = xunit_report, results_path = artifact_path, + db = db, ) def report_results(self, step_res_seq, service_hub, @@ -1942,7 +1955,7 @@ class ExekallLISATestStep(ShellStep): ignore_non_issue = False, ignore_excep = [], dump_artifact_dirs = False, - xunit2json = False, + export_db = False, export_logs = False, download = True, upload_artifact = False @@ -1951,6 +1964,11 @@ class ExekallLISATestStep(ShellStep): the run() method. """ + from exekall.utils import get_name, NoValue + from exekall.engine import ValueDB + + from lisa.tests.base import CannotCreateError, Result + if verbose: show_basic = True show_rates = True @@ -1969,37 +1987,35 @@ class ExekallLISATestStep(ShellStep): ignored_testcase_set = set(ignore_testcase) considered_iteration_set = set(iterations) - # Parse the xUnit XML report from LISA to know the failing tests + # Read the ValueDB from exekall to know the failing tests testcase_map = collections.defaultdict(list) filtered_step_res_seq = list() + db_list = [] for step_res_item in step_res_seq: i_stack, step_res = step_res_item + bisect_ret = None # Ignore the iterations we are not interested in if considered_iteration_set and i_stack[0] not in considered_iteration_set: continue - # The xunit attribute of LISATestStepResult results contains the - # xUnit file content generated by LISA - xunit = step_res.xunit.strip() - if not xunit: - warn("Empty LISA's xUnit for {step_name} step, iteration {i}".format( + db = step_res.db + if not db: + warn("No exekall ValueDB for {step_name} step, iteration {i}".format( step_name = step_res.step.name, i = i_stack )) continue + else: + db_list.append(db) + def key(froz_val): + return froz_val.get_id(full_qual=True, with_tags=True) - try: - et_testsuites = ET.fromstring(step_res.xunit.strip()) - except ET.ParseError as e: - warn("Could not open LISA's xUnit report: {e}".format(e=e)) - continue + # Gather all result bundles + froz_val_list = sorted(db.get_roots(), key=key) - bisect_ret = None - for et_testsuite in et_testsuites.findall('testsuite'): - testsuite_name = et_testsuite.get('name') - for et_testcase in et_testsuite.findall('testcase'): - testcase_id = et_testcase.get('name') + for froz_val in froz_val_list: + testcase_id = froz_val.get_id(qual=False, with_tags=True) # Ignore tests we are not interested in if ( @@ -2016,31 +2032,65 @@ class ExekallLISATestStep(ShellStep): continue entry = { - 'testsuite': testsuite_name, 'testcase_id': testcase_id, 'i_stack': i_stack, 'results_path': step_res.results_path, } - is_ignored = True - # It is assumed that there will only be one result subtag - for et_subtag in et_testcase: - tag = et_subtag.tag - if tag in ( - 'passed', 'failure', 'skipped', 'undecided', 'error' - ): - entry['result'] = tag - - type_name = et_subtag.get('type') - if tag == 'error': - is_ignored = is_excep_ignored(type_name, ignored_except_set) - else: - is_ignored = False + is_ignored = False - short_msg = et_subtag.get('message') - msg = et_subtag.text - entry['details'] = (type_name, short_msg, msg) - break + try: + # We only show the 1st exception, others are hidden + excep_froz_val = list(froz_val.get_excep())[0] + except IndexError: + excep_froz_val = None + + if excep_froz_val: + excep = excep_froz_val.excep + if isinstance(excep, CannotCreateError): + result = 'skipped' + else: + result = 'error' + entry['result'] = result + + type_name = get_name(type(excep)) + is_ignored |= is_excep_ignored( + type_name, + ignored_except_set, + ) + short_msg = str(excep) + msg = excep_froz_val.excep_tb + + entry['details'] = (type_name, short_msg, msg) + else: + val = froz_val.value + result_map = { + Result.PASSED: 'passed', + Result.FAILED: 'failure', + Result.UNDECIDED: 'undecided', + } + # If that is a ResultBundle, use its result to get + # most accurate info, otherwise just assume it is a + # bool-ish value + try: + result = val.result + msg = '\n'.join( + '{}: {}'.format(metric, value) + for metric, value in val.metrics.items() + ) + except AttributeError: + result = Result.PASSED if val else Result.FAILED + msg = str(val) + + # If the result is not something known, assume undecided + result = result_map.get(result, 'undecided') + + entry['result'] = result + type_name = get_name(type(val)) + short_msg = result + entry['details'] = (type_name, short_msg, msg) + + entry['froz_val'] = froz_val # Ignored testcases will not contribute to the number of # iterations @@ -2058,7 +2108,7 @@ class ExekallLISATestStep(ShellStep): if not any_entries: bisect_ret = BisectRet.NA - # Update the LISATestStepResult bisect result based on the xUnit + # Update the LISATestStepResult bisect result based on the DB # content. step_res.bisect_ret = bisect_ret @@ -2127,12 +2177,6 @@ class ExekallLISATestStep(ShellStep): os.unlink(archive_dst) os.symlink(archive_path, archive_dst) - # Save the xUnit file as well - if step_res.xunit: - xunit_log_path = os.path.join(log_dir, 'xunit.xml') - with open(xunit_log_path, 'w') as f: - f.write(step_res.xunit) - out('') # Always execute that for the potential side effects like exporting the @@ -2204,6 +2248,7 @@ class ExekallLISATestStep(ShellStep): i_stack = entry['i_stack'] results_path = '\n' + entry['results_path'] if show_artifact_dirs else '' exception_name, short_msg, msg = entry['details'] + uuid_ = entry['froz_val'].uuid if show_details == 'msg': msg = '' @@ -2216,7 +2261,9 @@ class ExekallLISATestStep(ShellStep): msg += ' ' table_out( - ' #{i_stack: <2}) {short_msg} ({exception_name}){results_path}{msg}'.format(**locals()).replace('\n', '\n\t') + ' #{i_stack: <2}) UUID={uuid_} ({exception_name}) {short_msg}{results_path}{msg}'.format( + **locals() + ).replace('\n', '\n\t') ) iterations_summary = stats['iterations_summary'] @@ -2245,7 +2292,6 @@ class ExekallLISATestStep(ShellStep): out() out(dist_out) - counts = { issue: sum( # If there is a non-zero count for that issue for that test, we @@ -2253,17 +2299,13 @@ class ExekallLISATestStep(ShellStep): bool(stats['counters'][issue]) for stats in testcase_stats.values() ) - for issue in ('error', 'failure', 'undecided', 'skipped') + for issue in ('error', 'failure', 'undecided', 'skipped', 'total') } + nr_tests = len(testcase_map) + assert counts['total'] == nr_tests + # Only account for tests that only passed and had no other issues - counts['passed'] = sum( - all( - not count - for issue, count in stats['counters'].items() - if issue != 'passed' - ) - for stats in testcase_stats.values() - ) + counts['passed'] = nr_tests - (sum(counts.values()) - nr_tests) out( 'Crashed: {counts[error]}/{total}, ' @@ -2272,13 +2314,21 @@ class ExekallLISATestStep(ShellStep): 'Skipped: {counts[skipped]}/{total}, ' 'Passed: {counts[passed]}/{total}'.format( counts=counts, - total=len(testcase_map), + total=nr_tests, ) ) - # Write out the digest in JSON format so another tool can exploit it - if xunit2json: - update_json(xunit2json, testcase_stats) + # Write-out a merged DB + if export_db: + try: + existing_db = ValueDB.from_path(export_db) + except FileNotFoundError: + pass + else: + db_list.append(existing_db) + + merged_db = ValueDB.merge(db_list) + merged_db.to_path(export_db) return out @@ -2356,7 +2406,6 @@ class LISATestStep(ShellStep): env = { 'LISA_RESULTS_BASE': results_path, - # TODO: remove that once LISA has switched to the new behavior 'LISA_RESULTS_DIR': results_path } info('Setting LISA_RESULTS_BASE = {results_path} ...'.format(**locals())) @@ -2430,8 +2479,8 @@ class LISATestStep(ShellStep): step = self, res_list = res_list, bisect_ret = bisect_ret, - xunit = xunit_report, results_path = results_path, + xunit = xunit_report, ) @@ -4020,7 +4069,7 @@ class Report(Serializable): report=self ) with open_f(temp_path, 'wb') as f: - pickle.dump(pickle_data, f) + pickle.dump(pickle_data, f, protocol=4) # Rename the file once we know for sure that writing to the temporary # report completed with success -- GitLab From 2fdb84a46f955b06a2c21b3c06e3ecd6c050acde Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 22 Jan 2019 15:41:49 +0000 Subject: [PATCH 11/26] lisa: Add regression.py This new module contains regression-tracking helpers, along with a CLI tool that can compare two exekall's ValueDB, each containing multiple values for the same testcase and check if there was any significant regression. --- lisa/regression.py | 230 +++++++++++++++++++++++++++++++++++++++++++++ setup.py | 1 + 2 files changed, 231 insertions(+) create mode 100644 lisa/regression.py diff --git a/lisa/regression.py b/lisa/regression.py new file mode 100644 index 000000000..5df22f1bc --- /dev/null +++ b/lisa/regression.py @@ -0,0 +1,230 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (C) 2019, Arm Limited and contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import re +from collections import OrderedDict, namedtuple + +from lisa.utils import groupby, memoized +from lisa.tests.base import Result, ResultBundle + +import scipy.stats + +ResultCount = namedtuple('ResultCount', ('passed', 'failed')) + +class RegressionResult: + """ + Compute failure-rate regression between old and new series. + + The regression is checked using Fisher's exact test. + + :param testcase_id: ID of the testcase, used for pretty-printing + :type testcase_id: str + + :param old_count: number of times the test passed and failed in the old series + :type old_count: ResultCount + + :param new_count: number of times the test passed and failed in the new series + :type new_count: ResultCount + + :param alpha: Alpha risk when carrying the statistical test + :type alpha: float + + + """ + def __init__(self, testcase_id, + old_count, new_count, + alpha=None, + ): + self.old_count = old_count + self.new_count = new_count + + self.testcase_id = testcase_id + self.alpha = alpha if alpha is not None else 0.05 + + @classmethod + def from_result_list(cls, testcase_id, old_list, new_list, alpha=None): + """ + Build a :class:`RegressionResult` from two list of + :class:`lisa.tests.kernel.test_bundle.Result`, or objects that can be + converted to `bool`. + + .. note:: Only ``FAILED`` and ``PASSED`` results are taken into account, + other results are ignored. + + :param testcase_id: ID of the testcase + :type testcase_id: str + + :param old_list: old series + :type old_list: list(lisa.tests.kernel.test_bundle.Result) + + :param new_list: new series + :type new_list: list(lisa.tests.kernel.test_bundle.Result) + + :param alpha: Alpha risk of the statistical test + :type alpha: float + """ + def coerce_to_bool(x, res): + if isinstance(x, ResultBundle): + return x.result is res + # handle other types as well, as long as they can be + # converted to bool + else: + if res is Result.FAILED: + return not bool(x) + elif res is Result.PASSED: + return bool(x) + + def count(seq, res): + return sum( + coerce_to_bool(x, res) + for x in seq + ) + + # Ignore errors and skipped tests + old_count = ResultCount( + failed=count(old_list, Result.FAILED), + passed=count(old_list, Result.PASSED), + ) + + new_count = ResultCount( + failed=count(new_list, Result.FAILED), + passed=count(new_list, Result.PASSED), + ) + + return cls( + testcase_id=testcase_id, + old_count=old_count, + new_count=new_count, + alpha=alpha, + ) + + @property + def sample_size(self): + """ + Tuple of sample sizes for old and new series. + + """ + return ( + (self.old_count.passed + self.old_count.failed), + (self.new_count.passed + self.new_count.failed), + ) + + @property + def failure_pc(self): + """ + Tuple of failure rate in percent for old an new series. + """ + def div(x, y): + try: + return x/y + except ZeroDivisionError: + return float('Inf') + failure_new_pc = 100 * div(self.new_count.failed, (self.new_count.failed + self.new_count.passed)) + _pc = 100 * div(self.old_count.failed, (self.old_count.failed + self.old_count.passed)) + + return (_pc, failure_new_pc) + + @property + def failure_delta_pc(self): + """ + Delta between old and new failure rate in percent. + """ + _pc, failure_new_pc = self.failure_pc + return failure_new_pc - _pc + + @property + def significant(self): + """ + True if there is a significant difference in failure rate, False + otherwise. + """ + return self.p_val <= self.alpha + + @property + def p_val(self): + """ + P-value of the statistical test. + """ + return self.get_p_val() + + @memoized + def get_p_val(self, alternative='two-sided'): + """ + Compute the p-value of the statistical test, with the given alternative + hypothesis. + """ + # Apply the Fisher exact test to all tests failures. + odds_ratio, p_val = scipy.stats.fisher_exact( + [ + # Ignore errors and skipped tests + [self.old_count.failed, self.old_count.passed], + [self.new_count.failed, self.new_count.passed], + ], + alternative = alternative, + ) + return p_val + +def compute_regressions(old_list, new_list, remove_tags=[], **kwargs): + """ + Compute a list of :class:`RegressionResult` out of two lists of + :class:`exekall.engine.FrozenVal`. + + The tests are first grouped by their ID, and then a + :class:`RegressionResult` is computed for each of these ID. + + :param old_list: old series of :class:`exekall.engine.FrozenVal` + :type old_list: list(exekall.engine.FrozenVal) + + :param new_list: new series of :class:`exekall.engine.FrozenVal` + :type new_list: list(exekall.engine.FrozenVal) + + :param remove_tags: remove the given list of tags from the IDs before + computing the regression. That allows computing regressions with a + different "board" tag for example. + :type remove_tags: list(str) + + :param kwargs: extra :meth:`RegressionResult.from_result_list` parameters + """ + def get_id(froz_val): + id_ = froz_val.get_id(qual=False, with_tags=True) + + # Remove tags, so that more test will share the same ID. This allows + # cross-board comparison for example. + for tag in remove_tags: + id_ = re.sub(r'\[{}=.*?\]'.format(tag), '', id_) + + return id_ + + def group_by_testcase(froz_val_list): + return OrderedDict( + (testcase_id, [froz_val.value for froz_val in froz_val_group]) + for testcase_id, froz_val_group in groupby(froz_val_list, key=get_id) + ) + + old_testcases = group_by_testcase(old_list) + new_testcases = group_by_testcase(new_list) + + return [ + RegressionResult.from_result_list( + testcase_id=testcase_id, + old_list=old_testcases[testcase_id], + new_list=new_testcases[testcase_id], + **kwargs, + ) + for testcase_id in sorted(old_testcases.keys() & new_testcases.keys()) + ] diff --git a/setup.py b/setup.py index f24ca6067..23f87da30 100755 --- a/setup.py +++ b/setup.py @@ -92,6 +92,7 @@ setup( "matplotlib >= 1.4.2", "pandas >= 0.23.0", "numpy", + "scipy", "ruamel.yaml >= 0.15.81", # Depdendencies that are shipped as part of the LISA repo as -- GitLab From ec63e99d84458318dd8f3b4c423d89f9b798d900 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Wed, 23 Jan 2019 18:07:59 +0000 Subject: [PATCH 12/26] exekall: add compare subcommand --- lisa/exekall_customize.py | 57 +++++++++++++++++++++++++- tools/exekall/exekall/customization.py | 9 +++- tools/exekall/exekall/engine.py | 17 ++++++-- tools/exekall/exekall/main.py | 57 ++++++++++++++++++++++++-- 4 files changed, 129 insertions(+), 11 deletions(-) diff --git a/lisa/exekall_customize.py b/lisa/exekall_customize.py index 874fce276..682aec83d 100644 --- a/lisa/exekall_customize.py +++ b/lisa/exekall_customize.py @@ -28,8 +28,9 @@ from lisa.env import TestEnv, TargetConf from lisa.platforms.platinfo import PlatformInfo from lisa.utils import HideExekallID, Loggable, ArtifactPath, get_subclasses, groupby, Serializable from lisa.conf import MultiSrcConf -from lisa.tests.base import TestBundle +from lisa.tests.base import TestBundle, ResultBundle from lisa.tests.scheduler.load_tracking import FreqInvarianceItem +from lisa.regression import compute_regressions from exekall.utils import get_name from exekall.engine import ExprData, Consumer, PrebuiltOperator @@ -128,7 +129,7 @@ class LISAAdaptor(AdaptorBase): return hidden_op_set @staticmethod - def register_cli_param(parser): + def register_run_param(parser): parser.add_argument('--conf', action='append', default=[], help="LISA configuration file. If multiple configurations of a given type are found, they are merged (last one can override keys in previous ones)") @@ -138,6 +139,58 @@ class LISAAdaptor(AdaptorBase): default=[], help="Serialized object to inject when building expressions") + @staticmethod + def register_compare_param(parser): + parser.add_argument('--alpha', type=float, + default=5, + help="""Alpha risk for Fisher exact test in percents.""") + + parser.add_argument('--non-significant', action='store_true', + help="""Also show non-significant changes of failure rate.""") + + parser.add_argument('--remove-tag', action='append', + default=[], + help="""Remove the given tags in the testcase IDs before comparison.""") + + def compare_db_list(self, db_list): + alpha = self.args.alpha / 100 + show_non_significant = self.args.non_significant + + result_list_old, result_list_new = [ + db.get_roots() + for db in db_list + ] + + regr_list = compute_regressions( + result_list_old, + result_list_new, + remove_tags=self.args.remove_tag, + alpha=alpha, + ) + + print('testcase failure rate changes with alpha={}\n'.format(alpha)) + + id_len = max(len(regr.testcase_id) for regr in regr_list) + + header = '{id:<{id_len}} old% new% delta% pvalue{regr_column}'.format( + id='testcase'.format(alpha), + id_len=id_len, + regr_column=' changed' if show_non_significant else '' + ) + print(header + '\n' + '-' * len(header)) + for regr in regr_list: + if regr.significant or show_non_significant: + old_pc, new_pc = regr.failure_pc + print('{id:<{id_len}} {old_pc:>5.1f}% {new_pc:>5.1f}% {delta_pc:>5.1f}% {pval:.2e} {has_regr}'.format( + id=regr.testcase_id, + old_pc=old_pc, + new_pc=new_pc, + delta_pc=regr.failure_delta_pc, + pval=regr.p_val, + id_len=id_len, + has_regr='*' if regr.significant and show_non_significant else '', + )) + @staticmethod def get_default_type_goal_pattern_set(): return {'*.ResultBundle'} diff --git a/tools/exekall/exekall/customization.py b/tools/exekall/exekall/customization.py index 60d55cae9..ec8302bfb 100644 --- a/tools/exekall/exekall/customization.py +++ b/tools/exekall/exekall/customization.py @@ -60,7 +60,14 @@ class AdaptorBase: return self.hidden_op_set @staticmethod - def register_cli_param(parser): + def register_run_param(parser): + pass + + @staticmethod + def register_compare_param(parser): + pass + + def compare_db_list(self, db_list): pass @staticmethod diff --git a/tools/exekall/exekall/engine.py b/tools/exekall/exekall/engine.py index e2c67df06..a59867400 100644 --- a/tools/exekall/exekall/engine.py +++ b/tools/exekall/exekall/engine.py @@ -54,10 +54,11 @@ class ValueDB: # dumping speed. PICKLE_PROTOCOL = 4 - def __init__(self, froz_val_seq_list): + def __init__(self, froz_val_seq_list, adaptor_cls=None): # Avoid storing duplicate FrozenExprVal sharing the same value/excep # UUID self.froz_val_seq_list = self._dedup_froz_val_seq_list(froz_val_seq_list) + self.adaptor_cls = adaptor_cls @classmethod def _dedup_froz_val_seq_list(cls, froz_val_seq_list): @@ -110,12 +111,20 @@ class ValueDB: return cls._froz_val_dfs(froz_val_seq_list, rewrite_graph) @classmethod - def merge(cls, db_seq): + def merge(cls, db_list): + adaptor_cls_set = { + db.adaptor_cls + for db in db_list + } + if len(adaptor_cls_set) != 1: + raise ValueError('Cannot merge ValueDB with different adaptor classes: {}'.format(adaptor_cls_set)) + adaptor_cls = utils.take_first(adaptor_cls_set) + froz_val_seq_list = list(itertools.chain(*( db.froz_val_seq_list - for db in db_seq + for db in db_list ))) - return cls(froz_val_seq_list) + return cls(froz_val_seq_list, adaptor_cls=adaptor_cls) @classmethod def from_path(cls, path, relative_to=None): diff --git a/tools/exekall/exekall/main.py b/tools/exekall/exekall/main.py index 0a500fc45..c92b4914f 100755 --- a/tools/exekall/exekall/main.py +++ b/tools/exekall/exekall/main.py @@ -339,6 +339,16 @@ should be treated as read-only. merge_parser.add_argument('--copy', action='store_true', help="""Force copying files, instead of using hardlinks.""") + + compare_parser = subparsers.add_parser('compare', + description=""" +Compare two DBs produced by exekall run. + """, + formatter_class=argparse.RawTextHelpFormatter) + + compare_parser.add_argument('db', nargs=2, + help="""DBs created using exekall run to compare.""") + # Avoid showing help message on the incomplete parser. Instead, we carry on # and the help will be displayed after the parser customization of run # subcommand has a chance to take place. @@ -369,7 +379,7 @@ should be treated as read-only. # Some subcommands need not parser customization, in which case we more # strictly parse the command line - if args.subcommand not in ['run']: + if args.subcommand not in ('run', 'compare'): parser.parse_args(argv) if args.subcommand == 'run': @@ -383,6 +393,43 @@ should be treated as read-only. use_hardlink=(not args.copy), ) + elif args.subcommand == 'compare': + return do_compare( + parser=parser, + compare_parser=compare_parser, + argv=argv, + db_path_list=args.db, + ) + +def do_compare(parser, compare_parser, argv, db_path_list): + assert len(db_path_list) == 2 + db_list = [ + engine.ValueDB.from_path(path) + for path in db_path_list + ] + + adaptor_cls_set = { + db.adaptor_cls + for db in db_list + } + if len(adaptor_cls_set) != 1: + raise ValueError('Cannot compare DBs that were built using a different adaptor: {}'.format(adaptor_cls_set)) + adaptor_cls = utils.take_first(adaptor_cls_set) + + # Add all the CLI arguments of the adaptor before reparsing the + # command line. + adaptor_cls.register_compare_param(compare_parser) + + # Reparse the command line after the adaptor had a chance to add its own + # arguments. + args = parser.parse_args(argv) + + # Create the adaptor with the args, so it can use it to implement + # comparison + adaptor = adaptor_cls(args) + + return adaptor.compare_db_list(db_list) + def do_merge(artifact_dirs, output_dir, use_hardlink=True, output_exist=False): output_dir = pathlib.Path(output_dir) @@ -490,7 +537,7 @@ def do_run(args, parser, run_parser, argv): raise RuntimeError('Adaptor "{}" cannot be found'.format(adaptor_name)) # Add all the CLI arguments of the adaptor before reparsing the # command line. - adaptor_cls.register_cli_param(run_parser) + adaptor_cls.register_run_param(run_parser) # Reparse the command line after the adaptor had a chance to add its own # arguments. @@ -717,6 +764,7 @@ def do_run(args, parser, run_parser, argv): testsession_uuid=testsession_uuid, hidden_callable_set=hidden_callable_set, only_template_scripts=only_template_scripts, + adaptor_cls=adaptor_cls, verbose=verbose, ) @@ -732,7 +780,7 @@ def do_run(args, parser, run_parser, argv): return exec_ret_code def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, - hidden_callable_set, only_template_scripts, verbose): + hidden_callable_set, only_template_scripts, adaptor_cls, verbose): if not only_template_scripts: with (artifact_dir/'UUID').open('wt') as f: @@ -938,7 +986,8 @@ def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, db = engine.ValueDB( engine.FrozenExprValSeq.from_expr_list( expr_list, hidden_callable_set=hidden_callable_set - ) + ), + adaptor_cls=adaptor_cls, ) db_path = artifact_dir/DB_FILENAME -- GitLab From f2d696771f6f99ec84f2e811b961055d6c614d5c Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Fri, 25 Jan 2019 10:45:10 +0000 Subject: [PATCH 13/26] bisector: speedup report reloading Disable garbage collection while parsing YAML or loading the pickle report. This speeds up the process. Also deduplicate strings when dumping the report. That gives a 40% speedup when reloading, and also makes the file smaller. --- tools/bisector/bisector/bisector.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/bisector/bisector/bisector.py b/tools/bisector/bisector/bisector.py index be0b54852..f63f4988a 100755 --- a/tools/bisector/bisector/bisector.py +++ b/tools/bisector/bisector/bisector.py @@ -26,6 +26,7 @@ import enum import fcntl import fnmatch import functools +import gc import glob import gzip import hashlib @@ -3953,8 +3954,13 @@ def init_yaml(yaml, relative_root): yaml.Representer.add_representer(collections.OrderedDict, map_representer) yaml.Constructor.add_constructor(yaml.resolver.DEFAULT_MAPPING_TAG, map_constructor) - # Use block style for multiline strings + # Since strings are immutable, we can memoized the output to deduplicate + # strings. This will make the dumped strings live forever, but that should + # not be a big issue since everything that ends up in a YAML document is + # also living in memory anyway in our use case. + @functools.lru_cache(maxsize=None, typed=True) def str_presenter(dumper, data): + # Use block style for multiline strings style = '|' if '\n' in data else None return dumper.represent_scalar('tag:yaml.org,2002:str', data, style=style) @@ -4009,6 +4015,25 @@ def check_report_path(path, probe_file): is_yaml = not (path.endswith('.pickle') or path.endswith('.pickle.gz')) return (open_f, is_yaml) +def disable_gc(f): + """ + Decorator to disable garbage collection during the execution of the + decorated function. + + This can speed-up code that creates a lot of objects in a short amount of + time. + """ + @functools.wraps(f) + def wrapper(*args, **kwargs): + gc.disable() + try: + return f(*args, **kwargs) + # re-enable the garbage collector in all cases + finally: + gc.enable() + + return wrapper + class Report(Serializable): """ Report body containg the result of the top level :class:`MacroStep` . @@ -4090,6 +4115,7 @@ class Report(Serializable): return (path, url) @classmethod + @disable_gc def _do_load(cls, path, steps_path): open_f, is_yaml = check_report_path(path, probe_file=True) -- GitLab From 8d94cac28792deb69fae048381405f44d259599b Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Mon, 28 Jan 2019 18:53:15 +0000 Subject: [PATCH 14/26] exekall: make ValueDB representation more compact --- tools/exekall/exekall/engine.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/exekall/exekall/engine.py b/tools/exekall/exekall/engine.py index a59867400..6c5e817a4 100644 --- a/tools/exekall/exekall/engine.py +++ b/tools/exekall/exekall/engine.py @@ -2232,6 +2232,10 @@ class FrozenExprVal(ExprValBase): return froz_val @staticmethod + # Since tuples are immutable, reuse the same tuple by memoizing the + # function. That allows more compact serialized representation in both YAML + # and Pickle. + @utils.once def _make_id_key(**kwargs): return tuple(sorted(kwargs.items())) -- GitLab From ba47389f11cccb60dbd40f9bd28cf61beacd99f7 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Mon, 28 Jan 2019 19:10:33 +0000 Subject: [PATCH 15/26] bisector: use more compact YAML representation Reduce indentation to bare minimum --- tools/bisector/bisector/bisector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/bisector/bisector/bisector.py b/tools/bisector/bisector/bisector.py index f63f4988a..6b6a6c5aa 100755 --- a/tools/bisector/bisector/bisector.py +++ b/tools/bisector/bisector/bisector.py @@ -3925,8 +3925,8 @@ def init_yaml(yaml, relative_root): # Without that, escape sequences are used to represent unicode characters in # plain ascii. yaml.allow_unicode = True - yaml.default_flow_style = False - yaml.indent = 4 + yaml.default_flow_style = True + yaml.indent(mapping=1, sequence=1, offset=0) # Allow !include tag to include other files. Relative paths are relative # to the folder of the root yaml file. Absolute paths are used as-is. -- GitLab From 8d5b82146d5672ebe9e83acce1f8bf5c56813e6c Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 29 Jan 2019 11:07:29 +0000 Subject: [PATCH 16/26] exekall: cleanup ValueDB reloading Now that ValueDB records its adaptor class, we can use that to automatically apply the path fixups when it is reloaded from a file. --- lisa/exekall_customize.py | 11 +++---- tools/exekall/exekall/customization.py | 6 ++-- tools/exekall/exekall/engine.py | 45 ++++++++++++++++++-------- tools/exekall/exekall/main.py | 8 ++--- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/lisa/exekall_customize.py b/lisa/exekall_customize.py index 682aec83d..99896ac36 100644 --- a/lisa/exekall_customize.py +++ b/lisa/exekall_customize.py @@ -196,12 +196,14 @@ class LISAAdaptor(AdaptorBase): return {'*.ResultBundle'} @classmethod - def load_db(cls, db_path, *args, **kwargs): - db = super().load_db(db_path, *args, **kwargs) + def reload_db(cls, db, path=None): + # If path is not known, we cannot do anything here + if not path: + return db # This will relocate ArtifactPath instances to the new absolute path of # the results folder, in case it has been moved to another place - artifact_dir = Path(db_path).parent.resolve() + artifact_dir = Path(path).parent.resolve() # Relocate ArtifactPath embeded in objects so they will always # contain an absolute path that adapts to the local filesystem @@ -292,6 +294,3 @@ class LISAAdaptor(AdaptorBase): tags = {k: v for k, v in tags.items() if v is not None} return tags - -# Expose it as a module-level name -load_db = LISAAdaptor.load_db diff --git a/tools/exekall/exekall/customization.py b/tools/exekall/exekall/customization.py index ec8302bfb..abf05ed4c 100644 --- a/tools/exekall/exekall/customization.py +++ b/tools/exekall/exekall/customization.py @@ -77,9 +77,9 @@ class AdaptorBase: def resolve_cls_name(self, goal): return utils.get_class_from_name(goal) - @staticmethod - def load_db(*args, **kwargs): - return ValueDB.from_path(*args, **kwargs) + @classmethod + def reload_db(cls, db, path=None): + return db def finalize_expr(self, expr): pass diff --git a/tools/exekall/exekall/engine.py b/tools/exekall/exekall/engine.py index 6c5e817a4..49e37ef0f 100644 --- a/tools/exekall/exekall/engine.py +++ b/tools/exekall/exekall/engine.py @@ -142,6 +142,31 @@ class ValueDB: db = pickle.load(f) assert isinstance(db, cls) + # Apply some post-processing on the DB with a known path + cls._call_adaptor_reload(db, path=path) + + return db + + @classmethod + def _reload_serialized(cls, dct): + db = cls.__new__(cls) + db.__dict__ = dct + + # Apply some post-processing on the DB that was just reloaded, with no + # path since we don't even know if that method was invoked on something + # serialized in a file. + cls._call_adaptor_reload(db, path=None) + + return db + + def __reduce_ex__(self, protocol): + return (self._reload_serialized, (self.__dict__,)) + + @staticmethod + def _call_adaptor_reload(db, path): + adaptor_cls = db.adaptor_cls + if adaptor_cls: + db = adaptor_cls.reload_db(db, path=path) return db def to_path(self, path, optimize=True): @@ -612,12 +637,15 @@ class ExpressionBase: return self.get_all_script([self], *args, **kwargs) @classmethod - def get_all_script(cls, expr_list, prefix='value', db_path='VALUE_DB.pickle.xz', db_relative_to=None, db_loader=None, db=None): + def get_all_script(cls, expr_list, prefix='value', db_path='VALUE_DB.pickle.xz', db_relative_to=None, db=None, adaptor_cls=None): assert expr_list if db is None: froz_val_seq_list = FrozenExprValSeq.from_expr_list(expr_list) - script_db = ScriptValueDB(ValueDB(froz_val_seq_list)) + script_db = ScriptValueDB(ValueDB( + froz_val_seq_list, + adaptor_cls=adaptor_cls, + )) else: script_db = ScriptValueDB(db) @@ -670,15 +698,6 @@ class ExpressionBase: result_name_map[expr] = result_name - # Get the name of the customized db_loader - if db_loader is None: - db_loader_name = '{cls_name}.from_path'.format( - cls_name=utils.get_name(ValueDB, full_qual=True), - ) - else: - module_name_set.add(inspect.getmodule(db_loader).__name__) - db_loader_name = utils.get_name(db_loader, full_qual=True) - # Add all the imports header = ( '#! /usr/bin/env python3\n\n' + @@ -712,9 +731,9 @@ class ExpressionBase: else: db_relative_to = '' - header += '{db} = {db_loader_name}({path}{db_relative_to})\n'.format( + header += '{db} = {db_loader}({path}{db_relative_to})\n'.format( db = script_db.var_name, - db_loader_name = db_loader_name, + db_loader = utils.get_name(ValueDB.from_path, full_qual=True), path = repr(str(db_path)), db_relative_to = db_relative_to ) diff --git a/tools/exekall/exekall/main.py b/tools/exekall/exekall/main.py index c92b4914f..daa466438 100755 --- a/tools/exekall/exekall/main.py +++ b/tools/exekall/exekall/main.py @@ -624,7 +624,7 @@ def do_run(args, parser, run_parser, argv): if load_db_path_list: db_list = [] for db_path in load_db_path_list: - db = adaptor.load_db(db_path) + db = engine.ValueDB.from_path(db_path) op_set.update( load_from_db(db, adaptor, non_reusable_type_set, load_db_pattern_list, load_db_uuid_list, load_db_uuid_args @@ -786,8 +786,6 @@ def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, with (artifact_dir/'UUID').open('wt') as f: f.write(testsession_uuid+'\n') - db_loader = adaptor.load_db - out('\nArtifacts dir: {}\n'.format(artifact_dir)) # Get a list of ComputableExpression in order to execute them @@ -846,7 +844,6 @@ def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, prefix = 'testcase', db_path = os.path.join('..', DB_FILENAME), db_relative_to = '__file__', - db_loader=db_loader )[1]+'\n', ) @@ -964,7 +961,6 @@ def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, prefix = 'testcase', db_path = os.path.join('..', '..', DB_FILENAME), db_relative_to = '__file__', - db_loader=db_loader )[1]+'\n', ) @@ -1010,7 +1006,7 @@ def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, db_path=db_path.relative_to(artifact_dir), db_relative_to='__file__', db=db, - db_loader=db_loader, + adaptor_cls=adaptor_cls, ) with script_path.open('wt', encoding='utf-8') as f: -- GitLab From 6c60c401db30b66dad991bec43a5ce21ef0f505c Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 29 Jan 2019 11:12:12 +0000 Subject: [PATCH 17/26] exekall: remove 'return' key in ExprValBase This allows doing DFS, treating the keys as children without entering a loop. --- tools/exekall/exekall/engine.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tools/exekall/exekall/engine.py b/tools/exekall/exekall/engine.py index 49e37ef0f..08b3157f2 100644 --- a/tools/exekall/exekall/engine.py +++ b/tools/exekall/exekall/engine.py @@ -2164,17 +2164,13 @@ class ExprValBase(collections.abc.Mapping): return id(self) def __getitem__(self, k): - if k == 'return': - return self.value - else: - return self.param_map[k] + return self.param_map[k] def __len__(self): - # account for 'return' - return len(self.param_map) + 1 + return len(self.param_map) def __iter__(self): - return itertools.chain(self.param_map.keys(), ['return']) + return self.param_map.keys() class FrozenExprVal(ExprValBase): def __init__(self, -- GitLab From 5d1b3b16365f70672f3b6c762150045cb51df4b3 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 29 Jan 2019 12:15:51 +0000 Subject: [PATCH 18/26] bisector: cleanup compression detection --- tools/bisector/bisector/bisector.py | 43 ++++++++++++++++++----------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/tools/bisector/bisector/bisector.py b/tools/bisector/bisector/bisector.py index 6b6a6c5aa..a2713c893 100755 --- a/tools/bisector/bisector/bisector.py +++ b/tools/bisector/bisector/bisector.py @@ -35,6 +35,7 @@ import inspect import itertools import json import logging +import lzma import math import multiprocessing import mimetypes @@ -42,6 +43,7 @@ import numbers import os import os.path import pickle +import pathlib import queue import re import select @@ -3993,26 +3995,35 @@ class ReportPreamble(Serializable): self.preamble_version = 0 def check_report_path(path, probe_file): - if probe_file: - # Try to open as gzip + def probe_open_f(open_f, path): try: - with gzip.open(path) as trial_f: - trial_f.read(1) - except OSError as e: - # If we get a gzip-specific error, we assume a plain file - if 'gzip' in str(e): - open_f = open - else: - raise + with open_f(path) as f: + f.read(1) + except Exception: + return False else: - open_f = gzip.open + return True + + # Default to uncompressed file + open_f = open + mime_map = { + 'gzip': gzip.open, + 'xz': lzma.open, + } + + if probe_file: + for f in mime_map.values(): + if probe_open_f(f, path): + open_f = f + break else: - if mimetypes.guess_type(path)[1] == 'gzip': - open_f = gzip.open - else: - open_f = open + guessed_mime = mimetypes.guess_type(path)[1] + open_f = mime_map.get(guessed_mime, open_f) - is_yaml = not (path.endswith('.pickle') or path.endswith('.pickle.gz')) + path = pathlib.Path(path) + compo = path.name.split('.') + # By default, assume YAML unless pickle is explicitly present + is_yaml = 'pickle' not in path.name.split('.') return (open_f, is_yaml) def disable_gc(f): -- GitLab From 7da9b4c92df386285b361ff233dac405bb75e524 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 29 Jan 2019 12:16:10 +0000 Subject: [PATCH 19/26] bisector: remove !include support That was not really needed and the implementation was somehow flaky --- tools/bisector/bisector/bisector.py | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/tools/bisector/bisector/bisector.py b/tools/bisector/bisector/bisector.py index a2713c893..88905304f 100755 --- a/tools/bisector/bisector/bisector.py +++ b/tools/bisector/bisector/bisector.py @@ -3922,31 +3922,16 @@ def do_run(slave_manager, iteration_n, stat_test, steps_filter=None, def init_yaml(yaml, relative_root): """ Initialize pyyaml to allow transparent loading and dumping of - OrderedDict, as well as an !include tag to include another file. + OrderedDict. """ - # Without that, escape sequences are used to represent unicode characters in - # plain ascii. + # Without allow_unicode, escape sequences are used to represent unicode + # characters in plain ASCII yaml.allow_unicode = True yaml.default_flow_style = True yaml.indent(mapping=1, sequence=1, offset=0) - # Allow !include tag to include other files. Relative paths are relative - # to the folder of the root yaml file. Absolute paths are used as-is. - # Environments variables are expanded in the path. - def include_constructor(loader, node): - filename = loader.construct_scalar(node) - filename = os.path.expandvars(filename) - if os.path.isabs(filename): - path = filename - else: - path = os.path.join(relative_root, filename) - - with open(path, 'r') as f: - return yaml.load(f) - - yaml.Constructor.add_constructor('!include', include_constructor) - - # Use OrderedDict as default mapping objects + # Dump OrderedDict as regular dictionaries, since we will reload map as + # OrderedDict as well def map_representer(dumper, data): return dumper.represent_dict(data.items()) -- GitLab From df176ad3a2b0ef304a6a777f42d94387ddc1e313 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 29 Jan 2019 12:31:05 +0000 Subject: [PATCH 20/26] bisector: rename report_results into report --- tools/bisector/bisector/bisector.py | 51 +++++++++++++++-------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/tools/bisector/bisector/bisector.py b/tools/bisector/bisector/bisector.py index 88905304f..273f98386 100755 --- a/tools/bisector/bisector/bisector.py +++ b/tools/bisector/bisector/bisector.py @@ -1027,14 +1027,14 @@ class StepMeta(abc.ABCMeta, type(Serializable)): """ Metaclass of all steps. - Wraps :meth:`__init__` and :meth:`report_results` to preprocess the values + Wraps :meth:`__init__` and :meth:`report` to preprocess the values of the parameters annotated with a command line option parser. """ def __new__(meta_cls, name, bases, dct): # Wrap some methods to preprocess some parameters that have a parser # defined using the "options" class attribute. for method_name in ( - name for name in ('__init__', 'report_results') + name for name in ('__init__', 'report') if name in dct ): wrapper = meta_cls.wrap_method(dct, method_name) @@ -1121,7 +1121,7 @@ class StepABC(Serializable, metaclass=StepMeta): options = dict( __init__ = {}, - report_results = dict( + report = dict( verbose = BoolParam('Increase verbosity'), ), ) @@ -1144,7 +1144,7 @@ class StepABC(Serializable, metaclass=StepMeta): pass @abc.abstractmethod - def report_results(self, step_res_seq, + def report(self, step_res_seq, verbose=True ): """ @@ -1191,7 +1191,7 @@ class StepBase(StepABC): use_systemd_run = BoolParam('use systemd-run to run the command. This allows cleanup of daemons spawned by the command (using cgroups)'), env = EnvListParam('environment variables with a list of values that will be used for each iterations, wrapping around. The string format is: VAR1=val1%val2%...%%VAR2=val1%val2%.... In YAML, it is a map of var names to list of values. A single string can be supplied instead of a list of values.'), ), - report_results = dict( + report = dict( verbose = BoolParam('increase verbosity'), show_basic = BoolParam('show command exit status for all iterations'), ignore_non_issue = BoolParam('consider only iteration with non-zero command exit status'), @@ -1258,7 +1258,8 @@ class StepBase(StepABC): out(indent + textwrap.dedent(cls.__doc__).strip().replace('\n', '\n'+indent) + '\n') for pretty, method_name in ( - ('run', '__init__'), ('report', 'report_results') + ('run', '__init__'), + ('report', 'report'), ): parser_map = get_steps_kwarg_parsers(cls, method_name) if not parser_map: @@ -1455,7 +1456,7 @@ class StepBase(StepABC): ensure_dir(os.path.join(dirname, 'foo')) return dirname - def report_results(self, step_res_seq, service_hub, + def report(self, step_res_seq, service_hub, verbose = False, show_basic = True, ignore_non_issue = False, @@ -1788,10 +1789,10 @@ class ExekallLISATestStep(ShellStep): # Some options are not supported **filter_keys(StepBase.options['__init__'], remove={'trials'}), ), - report_results = dict( - verbose = StepBase.options['report_results']['verbose'], - show_basic = StepBase.options['report_results']['show_basic'], - iterations = StepBase.options['report_results']['iterations'], + report = dict( + verbose = StepBase.options['report']['verbose'], + show_basic = StepBase.options['report']['show_basic'], + iterations = StepBase.options['report']['iterations'], show_rates = BoolParam('show percentages of failure, error, skipped, undecided and passed tests'), show_dist = BoolParam('show graphical distribution of issues among iterations with a one letter code: passed=".", failed="F", error="#", skipped="s", undecided="u"'), show_pass_rate = BoolParam('always show the pass rate of tests, even when there are failures or crashes as well'), @@ -1944,7 +1945,7 @@ class ExekallLISATestStep(ShellStep): db = db, ) - def report_results(self, step_res_seq, service_hub, + def report(self, step_res_seq, service_hub, verbose = False, show_basic = False, show_rates = True, @@ -2184,7 +2185,7 @@ class ExekallLISATestStep(ShellStep): # Always execute that for the potential side effects like exporting the # logs. - basic_report = super().report_results( + basic_report = super().report( step_res_seq, service_hub, export_logs=export_logs, show_basic=show_basic, verbose=verbose, ignore_non_issue=ignore_non_issue, iterations=iterations, @@ -2361,10 +2362,10 @@ class LISATestStep(ShellStep): upload_results = BoolParam('upload the LISA results directory to Artifactorial as the execution goes, and delete the local archive.'), **StepBase.options['__init__'], ), - report_results = dict( - verbose = StepBase.options['report_results']['verbose'], - show_basic = StepBase.options['report_results']['show_basic'], - iterations = StepBase.options['report_results']['iterations'], + report = dict( + verbose = StepBase.options['report']['verbose'], + show_basic = StepBase.options['report']['show_basic'], + iterations = StepBase.options['report']['iterations'], show_rates = BoolParam('show percentages of failure, error, skipped and passed tests'), show_dist = BoolParam('show graphical distribution of issues among iterations with a one letter code: passed=".", failed="F", error="#", skipped="s"'), show_pass_rate = BoolParam('always show the pass rate of tests, even when there are failures or crashes as well'), @@ -2487,7 +2488,7 @@ class LISATestStep(ShellStep): ) - def report_results(self, step_res_seq, service_hub, + def report(self, step_res_seq, service_hub, verbose = False, show_basic = False, show_rates = True, @@ -2683,7 +2684,7 @@ class LISATestStep(ShellStep): # Always execute that for the potential side effects like exporting the # logs. - basic_report = super().report_results( + basic_report = super().report( step_res_seq, service_hub, export_logs=export_logs, show_basic=show_basic, verbose=verbose, ignore_non_issue=ignore_non_issue, iterations=iterations, @@ -3322,7 +3323,7 @@ class MacroStep(StepBase): bail_out_early = BoolParam('start a new iteration when a step returned bisect status bad or untestable and skip all remaining steps'), relative_root = BoolOrStrParam('root of relative paths in class names', allow_empty=True), ), - report_results = dict() + report = dict() ) """Groups a set of steps in a logical sequence.""" @@ -3592,7 +3593,7 @@ class MacroStep(StepBase): return report - def report_results(self, macrostep_res_seq, service_hub, steps_filter=None, + def report(self, macrostep_res_seq, service_hub, steps_filter=None, step_options=dict()): """Report the results of nested steps.""" @@ -3628,7 +3629,7 @@ class MacroStep(StepBase): continue # Select the kwargs applicable for this step - kwargs = get_step_kwargs(step.cat, step.name, step, 'report_results', step_options) + kwargs = get_step_kwargs(step.cat, step.name, step, 'report', step_options) # Pass down some MacroStep-specific parameters if isinstance(step, MacroStep): @@ -3637,7 +3638,7 @@ class MacroStep(StepBase): 'steps_filter': steps_filter, }) - report_str = step.report_results( + report_str = step.report( step_res_list, service_hub, **kwargs @@ -3646,7 +3647,7 @@ class MacroStep(StepBase): # Build a temporary iteration result to aggregate the results of # any given step, as if all these results were coming from a list # of steps. This allows quickly spotting if any of the iterations - # went wrong. This must be done after calling step.report_results, + # went wrong. This must be done after calling step.report, # so that it has a chance to modify the results. bisect_ret = StepSeqResult( step, @@ -4278,7 +4279,7 @@ class Report(Serializable): out = MLString() out('Description: {self.description}'.format(self=self)) out('Creation time: {self.creation_time}\n'.format(self=self)) - out(self.result.step.report_results( + out(self.result.step.report( [(IterationCounterStack(), self.result)], service_hub = service_hub, steps_filter = steps_filter, -- GitLab From 1648bf7bb1b56149b2724eeb92f5a90f4b676ac6 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Wed, 30 Jan 2019 17:54:43 +0000 Subject: [PATCH 21/26] bisector: remove xunit2json-analyze command This has been superseded by `exekall compare`. --- tools/bisector/bisector/xunit2json.py | 250 -------------------------- tools/bisector/setup.py | 1 - 2 files changed, 251 deletions(-) delete mode 100755 tools/bisector/bisector/xunit2json.py diff --git a/tools/bisector/bisector/xunit2json.py b/tools/bisector/bisector/xunit2json.py deleted file mode 100755 index be0845832..000000000 --- a/tools/bisector/bisector/xunit2json.py +++ /dev/null @@ -1,250 +0,0 @@ -#! /usr/bin/env python3 - -import argparse -import collections -import functools -import itertools -import json -import operator -import sys - -import pandas as pd -import scipy.stats - -def error(msg): - print('Error: ' + str(msg), file=sys.stderr) - -def read_json(path): - try: - return pd.read_json(path, orient='index') - except ValueError as e: - raise FileNotFoundError("Unable to find or parse " + path) from e - -def correlate_df(events_df, threshold=0.0): - # Convert booleans to integers by multplying by 1 - events_df *= 1 - events_df = events_df.transpose().astype(float) - - # Compute the correlation between tests failures - corr_df = events_df.corr() - - # Rename the columns for better readability - corr_df.rename( - {name: i for i, name in enumerate(corr_df.index)}, - axis = 'columns', - inplace = True, - ) - corr_df.rename( - { - name: '({i}) {name}'.format(name=name, i=i) - for i, name in enumerate(corr_df.index) - }, - axis = 'rows', - inplace = True, - ) - - # Replace values below a given threshold by dots to improve readability - corr_df = corr_df.applymap((lambda x: x if abs(x) >= threshold else '.')) - return corr_df - -def compare_df(json_df1, json_df2, alpha, alternative='two-sided', non_significant=True, sort='name'): - # Use an inner-join to only consider tests that are present on both sides - merged_df = pd.merge(json_df1, json_df2, how='inner', right_index=True, - left_index=True, suffixes=('_old','_new')) - - if merged_df.empty: - raise ValueError("Merged dataframe is empty, there was no test in common") - - # Apply the Fisher exact test to all tests failures. - regression_map = collections.defaultdict(dict) - for row in merged_df.itertuples(index=True): - testcase = row[0] - failure_old = row.counters_old['failure'] - failure_new = row.counters_new['failure'] - passed_old = row.counters_old['passed'] - passed_new = row.counters_new['passed'] - - odds_ratio, p_val = scipy.stats.fisher_exact( - [ - # Ignore errors and skipped tests - [failure_old, passed_old], - [failure_new, passed_new], - ], - alternative = alternative - ) - - # Ignore errors and skipped tests - meaningful_total_old = failure_old + passed_old - meaningful_total_new = failure_new + passed_new - # If no meaningful iterations are available, return NaN - if not meaningful_total_old: - failure_old_pc = float('Inf') - else: - failure_old_pc = 100 * failure_old / (failure_old + passed_old) - if not meaningful_total_new: - failure_new_pc = float('Inf') - else: - failure_new_pc = 100 * failure_new / (failure_new + passed_new) - - delta = failure_new_pc - failure_old_pc - - regression_map[testcase] = { - 'failure_old': failure_old_pc, - 'failure_new': failure_new_pc, - 'delta': delta, - 'p-value': p_val, - # If the p-value is smaller than alpha, the regression or - # improvement is statistically significant - 'significant': (p_val <= alpha), - } - - regression_df = pd.DataFrame.from_dict(regression_map, orient='index') - if not non_significant: - regression_df = regression_df.loc[regression_df['significant']] - regression_df.drop('significant', axis=1, inplace=True) - - if sort == 'name': - regression_df.sort_index(inplace=True) - else: - regression_df.sort_values(by=[sort], ascending=False, inplace=True) - - return regression_df - -def _main(argv): - parser = argparse.ArgumentParser(description=""" - Compare tests failure rate using Fisher exact test. - """, - formatter_class=argparse.RawTextHelpFormatter) - - subparsers = parser.add_subparsers(title='subcommands', dest='subcommand') - - compare_parser = subparsers.add_parser('compare', - description="""Compare two JSON files.""" - ) - - analyze_parser = subparsers.add_parser('analyze', - description="""Analyze a given JSON file.""" - ) - - # Options for compare subcommand - compare_parser.add_argument('json', nargs=2, - help="""JSON files to compare.""") - - compare_parser.add_argument('--alpha', type=float, - default=5, - help="""Alpha risk for Fisher exact test in percents.""") - - compare_parser.add_argument('--non-significant', action='store_true', - help="""Also show non-significant changes of failure rate.""") - - compare_parser.add_argument('--sort', metavar='COL', - default='name', - help="""Sort the results by test name ("name") or by a column name.""") - - compare_parser.add_argument('--regression', action='store_true', - help="""Only look for regressions (one-sided Fisher exact test).""") - - # Options for analyze subcommand - analyze_parser.add_argument('json', nargs='+', - help="""JSON files to analyze.""") - - analyze_parser.add_argument('--corr', action='store_true', - help="""Show the correlation matrix between the tests' failures. High - correlation between tests that are always failing or passing are - usually not significant and should be filtered-out with --hide-const. - """) - - analyze_parser.add_argument('--failure-rate-csv', action='store_true', - help="""Show the failure rates in CSV.""") - - analyze_parser.add_argument('--corr-thresh', type=float, - default = 0.0, - help="""Hide correlation factors smaller than this threshold in absolute value.""") - - analyze_parser.add_argument('--hide-const', action='store_true', - help="""Remove columns and rows filled with NaN. This happens when the - variance of the failures is 0, which means correlation cannot be - defined.""") - - args = parser.parse_args(argv) - - # Do not crop any output - pd.set_option('display.width', None) - pd.set_option('display.max_columns', None) - pd.set_option('display.max_colwidth', -1) - pd.set_option('display.colheader_justify', 'center') - - if args.subcommand == 'compare': - json_df1 = read_json(args.json[0]) - json_df2 = read_json(args.json[1]) - - alpha = args.alpha / 100 - show_non_significant = args.non_significant - sort = args.sort - only_regression = args.regression - alternative = 'less' if only_regression else 'two-sided' - - try: - regression_df = compare_df( - json_df1, json_df2, - alpha, alternative, - show_non_significant, sort - ) - except KeyError as e: - error('Unknown column "{}"'.format(e)) - return 1 - - if not regression_df.empty: - print(regression_df.to_string( - formatters = { - 'failure_old':'{:5.2f}%'.format, - 'failure_new':'{:5.2f}%'.format, - 'delta':'{:6.2f}%'.format, - 'p-value':'{:4.2e}'.format, - } - )) - - if args.subcommand == 'analyze': - show_correlation = args.corr - correlation_thresh = args.corr_thresh - hide_const = args.hide_const - show_failure_rate = args.failure_rate_csv - - for json_path in args.json: - json_df = read_json(json_path) - - if show_correlation: - # Filter-out tests that never passed or never failed. They may - # have a 0 variance if they are also never skipped and are - # never erroring, which will result in NaN. Even if they don't - # have zero variance, they would be highly correlated which is - # usually not helpful. - if hide_const: - json_df = json_df[(json_df.failure != 0) & (json_df.passed != 0)] - - # Create a dataframe out of the failure events, one column per - # iteration. - failure_events_df = pd.DataFrame(json_df.events.apply(pd.Series).failure.values.tolist(), index=json_df.index) - # Fill the missing iteration using "false", since the event could not - # have happened on iteration where the test was not executed. - failure_events_df.fillna(value=False, inplace=True) - corr_df = correlate_df(failure_events_df, correlation_thresh) - print(corr_df.to_string(float_format=lambda x: "{:.2f}".format(x))) - - if show_failure_rate: - failure_df= json_df['failure']/(json_df['passed'] + json_df['failure']) - print(failure_df.to_csv(), end='') - -def main(argv=sys.argv[1:]): - try: - return_code = _main(argv=argv) - except FileNotFoundError as e: - error(e) - return_code = 1 - - sys.exit(return_code) - -if __name__ == '__main__': - main() - -# vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/tools/bisector/setup.py b/tools/bisector/setup.py index f6999e964..dbc2811f8 100755 --- a/tools/bisector/setup.py +++ b/tools/bisector/setup.py @@ -35,7 +35,6 @@ setup( entry_points={ 'console_scripts': [ 'bisector=bisector.bisector:main', - 'xunit2json-analyze=bisector.xunit2json:main' ], }, python_requires='>= 3.5', -- GitLab From 82b10448c0c45caf8962dfb831df414929b3e4cc Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Thu, 31 Jan 2019 18:56:04 +0000 Subject: [PATCH 22/26] bisector: Use "error" instead of "crashed" when exceptions are raised When unhandled exceptions are raised in tests, refer to them as errors instead of crashes, to avoid confusion with a kernel crash. --- tools/bisector/bisector/bisector.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/bisector/bisector/bisector.py b/tools/bisector/bisector/bisector.py index 273f98386..a6181f09a 100755 --- a/tools/bisector/bisector/bisector.py +++ b/tools/bisector/bisector/bisector.py @@ -1795,7 +1795,7 @@ class ExekallLISATestStep(ShellStep): iterations = StepBase.options['report']['iterations'], show_rates = BoolParam('show percentages of failure, error, skipped, undecided and passed tests'), show_dist = BoolParam('show graphical distribution of issues among iterations with a one letter code: passed=".", failed="F", error="#", skipped="s", undecided="u"'), - show_pass_rate = BoolParam('always show the pass rate of tests, even when there are failures or crashes as well'), + show_pass_rate = BoolParam('always show the pass rate of tests, even when there are failures or errors as well'), show_details = ChoiceOrBoolParam(['msg'], 'show details of results. Use "msg" for only a brief message'), show_artifact_dirs = BoolParam('show exekall artifact directory for all iterations'), testcase = CommaListParam('show only the test cases matching one of the patterns in the comma-separated list. * can be used to match any part of the name'), @@ -2194,7 +2194,7 @@ class ExekallLISATestStep(ShellStep): if show_basic: out(basic_report) - # Contains the percentage of skipped, failed, crashed and passed + # Contains the percentage of skipped, failed, error and passed # iterations for every testcase. testcase_stats = dict() table_out = MLString() @@ -2215,7 +2215,7 @@ class ExekallLISATestStep(ShellStep): ('skipped', 'skipped'), ('undecided', 'undecided'), ('failure', 'FAILED'), - ('error', 'CRASHED'), + ('error', 'ERROR'), ): filtered_entry_list = [entry for entry in entry_list if entry['result'] == issue] @@ -2312,7 +2312,7 @@ class ExekallLISATestStep(ShellStep): counts['passed'] = nr_tests - (sum(counts.values()) - nr_tests) out( - 'Crashed: {counts[error]}/{total}, ' + 'Error: {counts[error]}/{total}, ' 'Failed: {counts[failure]}/{total}, ' 'Undecided: {counts[undecided]}/{total}, ' 'Skipped: {counts[skipped]}/{total}, ' -- GitLab From 9ea0d139786bb4455760af1cf0989cd6decb5fcd Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Mon, 4 Feb 2019 15:01:58 +0000 Subject: [PATCH 23/26] exekall: Add BY_UUID root folder in artifacts This folder contains a bunch of symlinks to expressions' artifact directory. There is one symlink per: * UUID of expression * UUID of computed values (also pointing to the artifact directory of the enclosing expression) This allows writing a simple `exekall run --replay` wrapper to find the right DB: #! /bin/bash UUID=$1 shift db="$(find "$EXEKALL_ARTIFACT_ROOT" -type l -name "$UUID" -exec readlink -f '{}' ';' -quit)/../../VALUE_DB.pickle.xz" exec exekall --debug run lisa/tests --load-db "$db" --replay "$UUID" "$@" --- tools/exekall/exekall/main.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tools/exekall/exekall/main.py b/tools/exekall/exekall/main.py index daa466438..4e653b26f 100755 --- a/tools/exekall/exekall/main.py +++ b/tools/exekall/exekall/main.py @@ -446,13 +446,26 @@ def do_merge(artifact_dirs, output_dir, use_hardlink=True, output_exist=False): os.makedirs(str(output_dir), exist_ok=output_exist) merged_db_path = output_dir/DB_FILENAME + (output_dir/'BY_UUID').mkdir(exist_ok=True) testsession_uuid_list = [] for artifact_dir in artifact_dirs: with (artifact_dir/'UUID').open(encoding='utf-8') as f: testsession_uuid = f.read().strip() testsession_uuid_list.append(testsession_uuid) + src_by_uuid = artifact_dir/'BY_UUID' + for uuid_symlink in src_by_uuid.iterdir(): + target = uuid_symlink.resolve() + target = pathlib.Path('..', target.relative_to(artifact_dir.resolve())) + (output_dir/'BY_UUID'/uuid_symlink.name).symlink_to(target) + + link_base_path = pathlib.Path('ORIGIN', testsession_uuid) + shutil.copytree( + str(src_by_uuid), + str(output_dir/link_base_path/'BY_UUID'), + symlinks=True, + ) # Copy all the files recursively for dirpath, dirnames, filenames in os.walk(str(artifact_dir)): @@ -786,6 +799,8 @@ def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, with (artifact_dir/'UUID').open('wt') as f: f.write(testsession_uuid+'\n') + (artifact_dir/'BY_UUID').mkdir() + out('\nArtifacts dir: {}\n'.format(artifact_dir)) # Get a list of ComputableExpression in order to execute them @@ -965,10 +980,10 @@ def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, ) def format_uuid(expr_val_list): - uuid_list = sorted( + uuid_list = sorted({ expr_val.uuid for expr_val in expr_val_list - ) + }) return '\n'.join(uuid_list) def write_uuid(path, *args): @@ -979,6 +994,16 @@ def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, write_uuid(expr_artifact_dir/'REUSED_VALUES_UUID', reused_expr_val_set) write_uuid(expr_artifact_dir/'COMPUTED_VALUES_UUID', computed_expr_val_set) + # From there, use a relative path for symlinks + expr_artifact_dir = pathlib.Path('..', expr_artifact_dir.relative_to(artifact_dir)) + computed_uuid_set = { + expr_val.uuid + for expr_val in computed_expr_val_set + } + computed_uuid_set.add(expr.uuid) + for uuid_ in computed_uuid_set: + (artifact_dir/'BY_UUID'/uuid_).symlink_to(expr_artifact_dir) + db = engine.ValueDB( engine.FrozenExprValSeq.from_expr_list( expr_list, hidden_callable_set=hidden_callable_set -- GitLab From 64cbcb4da625d7bf3e62babd2eab8d8c388f0e87 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Mon, 4 Feb 2019 16:54:10 +0000 Subject: [PATCH 24/26] exekall: Fix --load-db Fix improper ValueDB merging --- tools/exekall/exekall/engine.py | 5 +++-- tools/exekall/exekall/main.py | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/exekall/exekall/engine.py b/tools/exekall/exekall/engine.py index 08b3157f2..af8b2489b 100644 --- a/tools/exekall/exekall/engine.py +++ b/tools/exekall/exekall/engine.py @@ -112,6 +112,7 @@ class ValueDB: @classmethod def merge(cls, db_list): + db_list = list(db_list) adaptor_cls_set = { db.adaptor_cls for db in db_list @@ -120,10 +121,10 @@ class ValueDB: raise ValueError('Cannot merge ValueDB with different adaptor classes: {}'.format(adaptor_cls_set)) adaptor_cls = utils.take_first(adaptor_cls_set) - froz_val_seq_list = list(itertools.chain(*( + froz_val_seq_list = list(itertools.chain.from_iterable( db.froz_val_seq_list for db in db_list - ))) + )) return cls(froz_val_seq_list, adaptor_cls=adaptor_cls) @classmethod diff --git a/tools/exekall/exekall/main.py b/tools/exekall/exekall/main.py index 4e653b26f..604aa8c8a 100755 --- a/tools/exekall/exekall/main.py +++ b/tools/exekall/exekall/main.py @@ -530,6 +530,9 @@ def do_merge(artifact_dirs, output_dir, use_hardlink=True, output_exist=False): with (output_dir/'UUID').open('wt') as f: f.write(combined_uuid+'\n') + if output_exist: + db_path_list.append(merged_db_path) + merged_db = engine.ValueDB.merge( engine.ValueDB.from_path(path) for path in db_path_list -- GitLab From ece83f6009b2d8dcd27cb435c39c396166e6ac9c Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Tue, 5 Feb 2019 16:18:07 +0000 Subject: [PATCH 25/26] shell: Make sure setuptools is up to date Make sure we use an up-to-date version of setuptools, so all dependencies can be cleanly installed. --- shell/lisa_shell | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shell/lisa_shell b/shell/lisa_shell index 3323416eb..2920492a7 100755 --- a/shell/lisa_shell +++ b/shell/lisa_shell @@ -153,8 +153,8 @@ function _lisa-upgrade-pip { # install if [[ "$LISA_USE_VENV" == 1 ]]; then lisa-venv-activate || return 1 - echo "Upgrading pip ..." - _lisa-python -m pip install --upgrade pip + echo "Upgrading pip and setuptools ..." + _lisa-python -m pip install --upgrade pip setuptools fi } -- GitLab From f99f38e1fdca9e58ef3e0b2fda6433f36fb0a4b7 Mon Sep 17 00:00:00 2001 From: Douglas RAILLARD Date: Wed, 6 Feb 2019 10:16:45 +0000 Subject: [PATCH 26/26] install_base_ubuntu: Add python3-dev package Depending on the availibility of wheels, some packages may need to have python dev headers to compile extension modules. --- install_base_ubuntu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install_base_ubuntu.sh b/install_base_ubuntu.sh index c0176c3bc..f136a9347 100755 --- a/install_base_ubuntu.sh +++ b/install_base_ubuntu.sh @@ -71,7 +71,7 @@ apt-get update # venv is not installed by default on Ubuntu, even though it is part of the # Python standard library apt-get -y install build-essential git wget expect kernelshark \ - python3 python3-pip python3-venv python3-setuptools python3-tk \ + python3 python3-dev python3-pip python3-venv python3-setuptools python3-tk \ gobject-introspection libcairo2-dev libgirepository1.0-dev gir1.2-gtk-3.0 install_nodejs -- GitLab