diff --git a/devmode_requirements.txt b/devmode_requirements.txt index 722b259d202d21f43c01ef26ca259eeb8735eb25..647fd5d8981392ed6ad584c55b4f3f32e634d506 100644 --- a/devmode_requirements.txt +++ b/devmode_requirements.txt @@ -1,6 +1,7 @@ # Must be ordered to satisfy the dependencies without pulling from PyPI. # Once they are found by pip in editable mode as specified here, they will be # used and not looked up on PyPI. +-e ./tools/exekall # devlib before WA and LISA -e ./external/devlib/ @@ -14,5 +15,4 @@ -e ./external/workload-automation/ -e ./[notebook,doc,test] --e ./tools/exekall -e ./tools/bisector[dbus] diff --git a/doc/doc_requirements.txt b/doc/doc_requirements.txt index c789b620878714df4acea0f73d7d2dbab240cc09..a93872b8ffdf2c95585f25a6fab63d9202c0f2a7 100644 --- a/doc/doc_requirements.txt +++ b/doc/doc_requirements.txt @@ -1,4 +1,5 @@ # A streamlined version of devmode_requirements.txt for doc building +-e ./tools/exekall # devlib before WA and LISA -e ./external/devlib/ @@ -12,5 +13,4 @@ -e ./external/workload-automation/ -e ./[doc] --e ./tools/exekall -e ./tools/bisector diff --git a/install_base_ubuntu.sh b/install_base_ubuntu.sh index c0176c3bcf7e94c321a545c5ebe697173440988e..f136a9347bbce5a06bee61919b6b29aec66d9374 100755 --- a/install_base_ubuntu.sh +++ b/install_base_ubuntu.sh @@ -71,7 +71,7 @@ apt-get update # venv is not installed by default on Ubuntu, even though it is part of the # Python standard library apt-get -y install build-essential git wget expect kernelshark \ - python3 python3-pip python3-venv python3-setuptools python3-tk \ + python3 python3-dev python3-pip python3-venv python3-setuptools python3-tk \ gobject-introspection libcairo2-dev libgirepository1.0-dev gir1.2-gtk-3.0 install_nodejs diff --git a/lisa/exekall_customize.py b/lisa/exekall_customize.py index 59cd1c014876b9b2cd2bc93d5622954cc646b658..99896ac3601684e8d472e1dc3402f3a425851f35 100644 --- a/lisa/exekall_customize.py +++ b/lisa/exekall_customize.py @@ -16,26 +16,24 @@ # limitations under the License. # +import argparse import contextlib import itertools -import inspect -import functools -import logging -import sys +import re import os.path from pathlib import Path -import xml.etree.ElementTree as ET -import traceback +from collections import OrderedDict, namedtuple from lisa.env import TestEnv, TargetConf from lisa.platforms.platinfo import PlatformInfo from lisa.utils import HideExekallID, Loggable, ArtifactPath, get_subclasses, groupby, Serializable from lisa.conf import MultiSrcConf -from lisa.tests.base import TestBundle, Result, ResultBundle, CannotCreateError +from lisa.tests.base import TestBundle, ResultBundle from lisa.tests.scheduler.load_tracking import FreqInvarianceItem +from lisa.regression import compute_regressions -from exekall.utils import info, get_name, get_mro, NoValue -from exekall.engine import ExprData, Consumer, PrebuiltOperator, ValueDB +from exekall.utils import get_name +from exekall.engine import ExprData, Consumer, PrebuiltOperator from exekall.customization import AdaptorBase class NonReusable: @@ -131,7 +129,7 @@ class LISAAdaptor(AdaptorBase): return hidden_op_set @staticmethod - def register_cli_param(parser): + def register_run_param(parser): parser.add_argument('--conf', action='append', default=[], help="LISA configuration file. If multiple configurations of a given type are found, they are merged (last one can override keys in previous ones)") @@ -141,17 +139,71 @@ class LISAAdaptor(AdaptorBase): default=[], help="Serialized object to inject when building expressions") + @staticmethod + def register_compare_param(parser): + parser.add_argument('--alpha', type=float, + default=5, + help="""Alpha risk for Fisher exact test in percents.""") + + parser.add_argument('--non-significant', action='store_true', + help="""Also show non-significant changes of failure rate.""") + + parser.add_argument('--remove-tag', action='append', + default=[], + help="""Remove the given tags in the testcase IDs before comparison.""") + + def compare_db_list(self, db_list): + alpha = self.args.alpha / 100 + show_non_significant = self.args.non_significant + + result_list_old, result_list_new = [ + db.get_roots() + for db in db_list + ] + + regr_list = compute_regressions( + result_list_old, + result_list_new, + remove_tags=self.args.remove_tag, + alpha=alpha, + ) + + print('testcase failure rate changes with alpha={}\n'.format(alpha)) + + id_len = max(len(regr.testcase_id) for regr in regr_list) + + header = '{id:<{id_len}} old% new% delta% pvalue{regr_column}'.format( + id='testcase'.format(alpha), + id_len=id_len, + regr_column=' changed' if show_non_significant else '' + ) + print(header + '\n' + '-' * len(header)) + for regr in regr_list: + if regr.significant or show_non_significant: + old_pc, new_pc = regr.failure_pc + print('{id:<{id_len}} {old_pc:>5.1f}% {new_pc:>5.1f}% {delta_pc:>5.1f}% {pval:.2e} {has_regr}'.format( + id=regr.testcase_id, + old_pc=old_pc, + new_pc=new_pc, + delta_pc=regr.failure_delta_pc, + pval=regr.p_val, + id_len=id_len, + has_regr='*' if regr.significant and show_non_significant else '', + )) + @staticmethod def get_default_type_goal_pattern_set(): return {'*.ResultBundle'} @classmethod - def load_db(cls, db_path, *args, **kwargs): - db = super().load_db(db_path, *args, **kwargs) + def reload_db(cls, db, path=None): + # If path is not known, we cannot do anything here + if not path: + return db # This will relocate ArtifactPath instances to the new absolute path of # the results folder, in case it has been moved to another place - artifact_dir = Path(db_path).parent.resolve() + artifact_dir = Path(path).parent.resolve() # Relocate ArtifactPath embeded in objects so they will always # contain an absolute path that adapts to the local filesystem @@ -242,130 +294,3 @@ class LISAAdaptor(AdaptorBase): tags = {k: v for k, v in tags.items() if v is not None} return tags - - def get_summary(self, result_map): - summary = super().get_summary(result_map) - - # The goal is to implement something that is roughly compatible with: - # https://github.com/jenkinsci/xunit-plugin/blob/master/src/main/resources/org/jenkinsci/plugins/xunit/types/model/xsd/junit-10.xsd - # This way, Jenkins should be able to read it, and other tools as well - - xunit_path = self.args.artifact_dir.joinpath('xunit.xml') - hidden_callable_set = { - op.callable_ - for op in self.hidden_op_set - } - et_root = self._create_xunit(result_map, hidden_callable_set) - et_tree = ET.ElementTree(et_root) - info('Writing xUnit file at: ' + str(xunit_path)) - et_tree.write(str(xunit_path)) - return summary - - def _create_xunit(self, result_map, hidden_callable_set): - et_testsuites = ET.Element('testsuites') - - testcase_list = list(result_map.keys()) - # We group by module in which the root operators are defined. There will - # be one testsuite for each such module. - def key(expr): - return expr.op.mod_name - - # One testsuite per module where a root operator is defined - for mod_name, group in groupby(testcase_list, key=key): - testcase_list = list(group) - et_testsuite = ET.SubElement(et_testsuites, 'testsuite', attrib=dict( - name = mod_name - )) - testsuite_counters = dict(failures=0, errors=0, tests=0, skipped=0) - - for testcase in testcase_list: - artifact_path = testcase.data.get('artifact_dir', None) - - # If there is more than one value for a given expression, we - # assume that they testcase will have unique names using tags - expr_val_list = result_map[testcase] - for expr_val in expr_val_list: - - # Get the set of UUIDs of all TestBundle instances that were - # involved in the testcase. - def bundle_predicate(expr_val): - return issubclass(expr_val.expr.op.value_type, TestBundle) - bundle_uuid_set = { - expr_val.uuid - for expr_val in expr_val.get_by_predicate(bundle_predicate) - } - bundle_uuid_set.discard(None) - - et_testcase = ET.SubElement(et_testsuite, 'testcase', dict( - name = expr_val.get_id( - full_qual=False, - qual=False, - with_tags=True, - hidden_callable_set=hidden_callable_set, - ), - # This may help locating the artifacts, even though it - # will only be valid on the machine it was produced on - artifact_path=str(artifact_path), - bundle_uuids=','.join(sorted(bundle_uuid_set)), - )) - testsuite_counters['tests'] += 1 - - for failed_expr_val in expr_val.get_excep(): - excep = failed_expr_val.excep - # When one critical object cannot be created, we assume - # the test was skipped. - if isinstance(excep, CannotCreateError): - result = 'skipped' - testsuite_counters['skipped'] += 1 - else: - result = 'error' - testsuite_counters['errors'] += 1 - - short_msg = str(excep) - msg = ''.join(traceback.format_exception(type(excep), excep, excep.__traceback__)) - type_ = type(excep) - - _append_result_tag(et_testcase, result, type_, short_msg, msg) - - value = expr_val.value - if isinstance(value, ResultBundle): - result = RESULT_TAG_MAP[value.result] - short_msg = value.result.lower_name - msg = str(value) - type_ = type(value) - - _append_result_tag(et_testcase, result, type_, short_msg, msg) - if value.result is Result.FAILED: - testsuite_counters['failures'] += 1 - - et_testsuite.attrib.update( - (k, str(v)) for k, v in testsuite_counters.items() - ) - - return et_testsuites - -# Expose it as a module-level name -load_db = LISAAdaptor.load_db - -def _append_result_tag(et_testcase, result, type_, short_msg, msg): - et_result = ET.SubElement(et_testcase, result, dict( - type=get_name(type_, full_qual=True), - type_bases=','.join( - get_name(type_, full_qual=True) - for type_ in get_mro(type_) - ), - message=str(short_msg), - )) - et_result.text = str(msg) - return et_result - -RESULT_TAG_MAP = { - # "passed" is an extension to xUnit format that we add for parsing - # convenience - Result.PASSED: 'passed', - Result.FAILED: 'failure', - # This tag is not part of xUnit format but necessary for our reporting - Result.UNDECIDED: 'undecided' -} -# Make sure we cover all cases -assert set(RESULT_TAG_MAP.keys()) == set(Result) diff --git a/lisa/regression.py b/lisa/regression.py new file mode 100644 index 0000000000000000000000000000000000000000..5df22f1bcc97e715426bb5930fb9d0ad48dc7fae --- /dev/null +++ b/lisa/regression.py @@ -0,0 +1,230 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (C) 2019, Arm Limited and contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import re +from collections import OrderedDict, namedtuple + +from lisa.utils import groupby, memoized +from lisa.tests.base import Result, ResultBundle + +import scipy.stats + +ResultCount = namedtuple('ResultCount', ('passed', 'failed')) + +class RegressionResult: + """ + Compute failure-rate regression between old and new series. + + The regression is checked using Fisher's exact test. + + :param testcase_id: ID of the testcase, used for pretty-printing + :type testcase_id: str + + :param old_count: number of times the test passed and failed in the old series + :type old_count: ResultCount + + :param new_count: number of times the test passed and failed in the new series + :type new_count: ResultCount + + :param alpha: Alpha risk when carrying the statistical test + :type alpha: float + + + """ + def __init__(self, testcase_id, + old_count, new_count, + alpha=None, + ): + self.old_count = old_count + self.new_count = new_count + + self.testcase_id = testcase_id + self.alpha = alpha if alpha is not None else 0.05 + + @classmethod + def from_result_list(cls, testcase_id, old_list, new_list, alpha=None): + """ + Build a :class:`RegressionResult` from two list of + :class:`lisa.tests.kernel.test_bundle.Result`, or objects that can be + converted to `bool`. + + .. note:: Only ``FAILED`` and ``PASSED`` results are taken into account, + other results are ignored. + + :param testcase_id: ID of the testcase + :type testcase_id: str + + :param old_list: old series + :type old_list: list(lisa.tests.kernel.test_bundle.Result) + + :param new_list: new series + :type new_list: list(lisa.tests.kernel.test_bundle.Result) + + :param alpha: Alpha risk of the statistical test + :type alpha: float + """ + def coerce_to_bool(x, res): + if isinstance(x, ResultBundle): + return x.result is res + # handle other types as well, as long as they can be + # converted to bool + else: + if res is Result.FAILED: + return not bool(x) + elif res is Result.PASSED: + return bool(x) + + def count(seq, res): + return sum( + coerce_to_bool(x, res) + for x in seq + ) + + # Ignore errors and skipped tests + old_count = ResultCount( + failed=count(old_list, Result.FAILED), + passed=count(old_list, Result.PASSED), + ) + + new_count = ResultCount( + failed=count(new_list, Result.FAILED), + passed=count(new_list, Result.PASSED), + ) + + return cls( + testcase_id=testcase_id, + old_count=old_count, + new_count=new_count, + alpha=alpha, + ) + + @property + def sample_size(self): + """ + Tuple of sample sizes for old and new series. + + """ + return ( + (self.old_count.passed + self.old_count.failed), + (self.new_count.passed + self.new_count.failed), + ) + + @property + def failure_pc(self): + """ + Tuple of failure rate in percent for old an new series. + """ + def div(x, y): + try: + return x/y + except ZeroDivisionError: + return float('Inf') + failure_new_pc = 100 * div(self.new_count.failed, (self.new_count.failed + self.new_count.passed)) + _pc = 100 * div(self.old_count.failed, (self.old_count.failed + self.old_count.passed)) + + return (_pc, failure_new_pc) + + @property + def failure_delta_pc(self): + """ + Delta between old and new failure rate in percent. + """ + _pc, failure_new_pc = self.failure_pc + return failure_new_pc - _pc + + @property + def significant(self): + """ + True if there is a significant difference in failure rate, False + otherwise. + """ + return self.p_val <= self.alpha + + @property + def p_val(self): + """ + P-value of the statistical test. + """ + return self.get_p_val() + + @memoized + def get_p_val(self, alternative='two-sided'): + """ + Compute the p-value of the statistical test, with the given alternative + hypothesis. + """ + # Apply the Fisher exact test to all tests failures. + odds_ratio, p_val = scipy.stats.fisher_exact( + [ + # Ignore errors and skipped tests + [self.old_count.failed, self.old_count.passed], + [self.new_count.failed, self.new_count.passed], + ], + alternative = alternative, + ) + return p_val + +def compute_regressions(old_list, new_list, remove_tags=[], **kwargs): + """ + Compute a list of :class:`RegressionResult` out of two lists of + :class:`exekall.engine.FrozenVal`. + + The tests are first grouped by their ID, and then a + :class:`RegressionResult` is computed for each of these ID. + + :param old_list: old series of :class:`exekall.engine.FrozenVal` + :type old_list: list(exekall.engine.FrozenVal) + + :param new_list: new series of :class:`exekall.engine.FrozenVal` + :type new_list: list(exekall.engine.FrozenVal) + + :param remove_tags: remove the given list of tags from the IDs before + computing the regression. That allows computing regressions with a + different "board" tag for example. + :type remove_tags: list(str) + + :param kwargs: extra :meth:`RegressionResult.from_result_list` parameters + """ + def get_id(froz_val): + id_ = froz_val.get_id(qual=False, with_tags=True) + + # Remove tags, so that more test will share the same ID. This allows + # cross-board comparison for example. + for tag in remove_tags: + id_ = re.sub(r'\[{}=.*?\]'.format(tag), '', id_) + + return id_ + + def group_by_testcase(froz_val_list): + return OrderedDict( + (testcase_id, [froz_val.value for froz_val in froz_val_group]) + for testcase_id, froz_val_group in groupby(froz_val_list, key=get_id) + ) + + old_testcases = group_by_testcase(old_list) + new_testcases = group_by_testcase(new_list) + + return [ + RegressionResult.from_result_list( + testcase_id=testcase_id, + old_list=old_testcases[testcase_id], + new_list=new_testcases[testcase_id], + **kwargs, + ) + for testcase_id in sorted(old_testcases.keys() & new_testcases.keys()) + ] diff --git a/lisa/utils.py b/lisa/utils.py index 7af99785b2d5615faa9810de21796a030a67c2eb..426f9fac26aae380fdd50ca88ae630545a04e100 100644 --- a/lisa/utils.py +++ b/lisa/utils.py @@ -46,9 +46,6 @@ LISA_HOME = os.getenv('LISA_HOME') The detected location of your LISA installation """ -if not LISA_HOME: - logging.getLogger(__name__).warning('LISA_HOME env var is not set, LISA may misbehave.') - class Loggable: """ A simple class for uniformly named loggers diff --git a/requirements.txt b/requirements.txt index aa56fac3cfa8c83da36da6b6a26add509f777fdd..ee4334a484e8a3bbf10f2ccf59d5b5faed546dcd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -./[notebook,doc,test] ./tools/exekall +./[notebook,doc,test] ./tools/bisector[dbus] diff --git a/setup.py b/setup.py index f24ca6067cae4d02534e08aa2de0ed21270bd7cf..23f87da30f2555e3182fce7d98ca3453ddb185a9 100755 --- a/setup.py +++ b/setup.py @@ -92,6 +92,7 @@ setup( "matplotlib >= 1.4.2", "pandas >= 0.23.0", "numpy", + "scipy", "ruamel.yaml >= 0.15.81", # Depdendencies that are shipped as part of the LISA repo as diff --git a/shell/lisa_shell b/shell/lisa_shell index 3323416eb7c65b8cbea5e7dc2fac8bf72064310e..2920492a78e82b9e1a7f49f07ad52bfb0d75df63 100755 --- a/shell/lisa_shell +++ b/shell/lisa_shell @@ -153,8 +153,8 @@ function _lisa-upgrade-pip { # install if [[ "$LISA_USE_VENV" == 1 ]]; then lisa-venv-activate || return 1 - echo "Upgrading pip ..." - _lisa-python -m pip install --upgrade pip + echo "Upgrading pip and setuptools ..." + _lisa-python -m pip install --upgrade pip setuptools fi } diff --git a/tools/bisector/bisector/bisector.py b/tools/bisector/bisector/bisector.py index 69908e3a67d5f011c2aa3bea7f7e6c248fdbd577..a6181f09a9792fd89355394e35f37245344d2dce 100755 --- a/tools/bisector/bisector/bisector.py +++ b/tools/bisector/bisector/bisector.py @@ -26,6 +26,7 @@ import enum import fcntl import fnmatch import functools +import gc import glob import gzip import hashlib @@ -34,6 +35,7 @@ import inspect import itertools import json import logging +import lzma import math import multiprocessing import mimetypes @@ -41,6 +43,7 @@ import numbers import os import os.path import pickle +import pathlib import queue import re import select @@ -1024,14 +1027,14 @@ class StepMeta(abc.ABCMeta, type(Serializable)): """ Metaclass of all steps. - Wraps :meth:`__init__` and :meth:`report_results` to preprocess the values + Wraps :meth:`__init__` and :meth:`report` to preprocess the values of the parameters annotated with a command line option parser. """ def __new__(meta_cls, name, bases, dct): # Wrap some methods to preprocess some parameters that have a parser # defined using the "options" class attribute. for method_name in ( - name for name in ('__init__', 'report_results') + name for name in ('__init__', 'report') if name in dct ): wrapper = meta_cls.wrap_method(dct, method_name) @@ -1118,7 +1121,7 @@ class StepABC(Serializable, metaclass=StepMeta): options = dict( __init__ = {}, - report_results = dict( + report = dict( verbose = BoolParam('Increase verbosity'), ), ) @@ -1141,7 +1144,7 @@ class StepABC(Serializable, metaclass=StepMeta): pass @abc.abstractmethod - def report_results(self, step_res_seq, + def report(self, step_res_seq, verbose=True ): """ @@ -1188,7 +1191,7 @@ class StepBase(StepABC): use_systemd_run = BoolParam('use systemd-run to run the command. This allows cleanup of daemons spawned by the command (using cgroups)'), env = EnvListParam('environment variables with a list of values that will be used for each iterations, wrapping around. The string format is: VAR1=val1%val2%...%%VAR2=val1%val2%.... In YAML, it is a map of var names to list of values. A single string can be supplied instead of a list of values.'), ), - report_results = dict( + report = dict( verbose = BoolParam('increase verbosity'), show_basic = BoolParam('show command exit status for all iterations'), ignore_non_issue = BoolParam('consider only iteration with non-zero command exit status'), @@ -1255,7 +1258,8 @@ class StepBase(StepABC): out(indent + textwrap.dedent(cls.__doc__).strip().replace('\n', '\n'+indent) + '\n') for pretty, method_name in ( - ('run', '__init__'), ('report', 'report_results') + ('run', '__init__'), + ('report', 'report'), ): parser_map = get_steps_kwarg_parsers(cls, method_name) if not parser_map: @@ -1452,7 +1456,7 @@ class StepBase(StepABC): ensure_dir(os.path.join(dirname, 'foo')) return dirname - def report_results(self, step_res_seq, service_hub, + def report(self, step_res_seq, service_hub, verbose = False, show_basic = True, ignore_non_issue = False, @@ -1713,10 +1717,12 @@ class LISATestStepResult(StepResult): name = 'LISA-test', ) - def __init__(self, xunit, results_path, *args, **kwargs): - super().__init__(*args, **kwargs) + # TODO: get rid of xunit and only use db + def __init__(self, results_path, db=None, xunit=None, **kwargs): + super().__init__(**kwargs) self.xunit = xunit self.results_path = results_path + self.db = db def is_excep_ignored(excep, ignored_except_set): # If the type is missing, assume a generic Exception @@ -1759,7 +1765,7 @@ def deprecated_parse_testcase_res(testcase, kind, ignored_except_set): class ExekallLISATestStep(ShellStep): """ - Execute an exekall LISA test command and collect xUnit results. Also + Execute an exekall LISA test command and collect exekall's ValueDB. Also compress the result directory and record its path. It will also define some environment variables that are expected to be used by the command to be able to locate resources to collect. @@ -1772,31 +1778,33 @@ class ExekallLISATestStep(ShellStep): use_systemd_run = False, compress_artifact = True, upload_artifact = False, + prune_db = True, ) options = dict( __init__ = dict( compress_artifact = BoolParam('compress the exekall artifact directory in an archive'), upload_artifact = BoolParam('upload the exekall artifact directory to Artifactorial as the execution goes, and delete the local archive.'), + prune_db = BoolParam("Prune exekall's ValueDB so that only roots values are preserved. That allows smaller reports that are faster to load"), # Some options are not supported **filter_keys(StepBase.options['__init__'], remove={'trials'}), ), - report_results = dict( - verbose = StepBase.options['report_results']['verbose'], - show_basic = StepBase.options['report_results']['show_basic'], - iterations = StepBase.options['report_results']['iterations'], + report = dict( + verbose = StepBase.options['report']['verbose'], + show_basic = StepBase.options['report']['show_basic'], + iterations = StepBase.options['report']['iterations'], show_rates = BoolParam('show percentages of failure, error, skipped, undecided and passed tests'), show_dist = BoolParam('show graphical distribution of issues among iterations with a one letter code: passed=".", failed="F", error="#", skipped="s", undecided="u"'), - show_pass_rate = BoolParam('always show the pass rate of tests, even when there are failures or crashes as well'), + show_pass_rate = BoolParam('always show the pass rate of tests, even when there are failures or errors as well'), show_details = ChoiceOrBoolParam(['msg'], 'show details of results. Use "msg" for only a brief message'), show_artifact_dirs = BoolParam('show exekall artifact directory for all iterations'), - testcase = CommaListParam('show only the test cases matching one of the patterns in the comma-separated list. * can be used to match any part of the name.'), + testcase = CommaListParam('show only the test cases matching one of the patterns in the comma-separated list. * can be used to match any part of the name'), ignore_testcase = CommaListParam('completely ignore test cases matching one of the patterns in the comma-separated list. * can be used to match any part of the name.'), ignore_non_issue = BoolParam('consider only tests that failed'), ignore_excep = CommaListParam('ignore the given comma-separated list of exceptions that caused tests failure or error. * can be used to match any part of the name'), dump_artifact_dirs = BoolOrStrParam('write the list of exekall artifact directories to a file. Useful to implement garbage collection of unreferenced artifact archives'), - xunit2json = BoolOrStrParam('append consolidated xUnit information to a JSON file'), - export_logs = BoolOrStrParam('export the logs, xUnit file and artifact directory symlink to the given directory'), + export_db = BoolOrStrParam('export a merged exekall ValueDB, merging it with existing ValueDB if the file exists', allow_empty=False), + export_logs = BoolOrStrParam('export the logs and artifact directory symlink to the given directory'), download = BoolParam('Download the exekall artifact archives if necessary'), upload_artifact = BoolParam('upload the artifact directory to Artifactorial and update the in-memory report. Following env var are needed: ARTIFACTORIAL_FOLDER set to the folder URL and ARTIFACTORIAL_TOKEN. Note: --export should be used to save the report with updated paths'), ) @@ -1806,6 +1814,7 @@ class ExekallLISATestStep(ShellStep): def __init__(self, compress_artifact = Default, upload_artifact = Default, + prune_db = Default, **kwargs ): kwargs['trials'] = 1 @@ -1818,8 +1827,12 @@ class ExekallLISATestStep(ShellStep): compress_artifact = True self.compress_artifact = compress_artifact + self.prune_db = prune_db def run(self, i_stack, service_hub): + from exekall.utils import NoValue + from exekall.engine import ValueDB + artifact_path = os.getenv( 'EXEKALL_ARTIFACT_ROOT', # default value @@ -1827,7 +1840,9 @@ class ExekallLISATestStep(ShellStep): ) # Create a unique artifact dir - artifact_path = os.path.join(artifact_path, uuid.uuid4().hex) + date = datetime.datetime.now().strftime('%Y%m%d_%H:%M:%S') + name = '{}_{}'.format(date, uuid.uuid4().hex) + artifact_path = os.path.join(artifact_path, name) # This also strips the trailing /, which is needed later on when # archiving the artifact. @@ -1847,29 +1862,31 @@ class ExekallLISATestStep(ShellStep): else: bisect_ret = BisectRet.GOOD - # First item is the oldest created file - xunit_path_list = sorted( - glob.glob(os.path.join(artifact_path, '**/xunit.xml'), recursive=True), - key=lambda x: os.path.getmtime(x) - ) - xunit_report = '' - if xunit_path_list: - # Take the most recent file - xunit_path = xunit_path_list[-1] - if len(xunit_path_list) > 1: - warn('Found more than one xUnit file, only taking the most recent one ({}): {}'.format( - xunit_path, - ', '.join(xunit_path_list) - )) - - try: - with open(xunit_path, 'r') as xunit_file: - xunit_report = xunit_file.read() - except OSError as e: - warn('Could not open xUnit XML {}: {}'.format(xunit_path, e)) - + db_path = os.path.join(artifact_path, 'VALUE_DB.pickle.xz') + try: + db = ValueDB.from_path(db_path) + except Exception as e: + warn('Could not read DB at {}: {}'.format(db_path, e)) + db = None else: - warn('No xUnit file found under: {}'.format(artifact_path)) + if self.prune_db: + # Prune the DB so we only keep the root values in it, or the + # exceptions + root_froz_val_uuids = { + froz_val.uuid + for froz_val in db.get_roots() + } + def prune_predicate(froz_val): + return not ( + # Keep the root values, usually ResultBundle's. We + # cannot compare the object themselves since they have + # been copied to the pruned DB. + froz_val.uuid in root_froz_val_uuids + or + # keep errors and values leading to them + froz_val.value is NoValue + ) + db = db.prune_by_predicate(prune_predicate) # Compress artifact directory if self.compress_artifact: @@ -1918,17 +1935,17 @@ class ExekallLISATestStep(ShellStep): path = artifact_local_path, )) else: - error('No upload service available, could not upload exkeall artifact.') + error('No upload service available, could not upload exekall artifact.') return LISATestStepResult( step = self, res_list = res_list, bisect_ret = bisect_ret, - xunit = xunit_report, results_path = artifact_path, + db = db, ) - def report_results(self, step_res_seq, service_hub, + def report(self, step_res_seq, service_hub, verbose = False, show_basic = False, show_rates = True, @@ -1942,7 +1959,7 @@ class ExekallLISATestStep(ShellStep): ignore_non_issue = False, ignore_excep = [], dump_artifact_dirs = False, - xunit2json = False, + export_db = False, export_logs = False, download = True, upload_artifact = False @@ -1951,6 +1968,11 @@ class ExekallLISATestStep(ShellStep): the run() method. """ + from exekall.utils import get_name, NoValue + from exekall.engine import ValueDB + + from lisa.tests.base import CannotCreateError, Result + if verbose: show_basic = True show_rates = True @@ -1969,37 +1991,35 @@ class ExekallLISATestStep(ShellStep): ignored_testcase_set = set(ignore_testcase) considered_iteration_set = set(iterations) - # Parse the xUnit XML report from LISA to know the failing tests + # Read the ValueDB from exekall to know the failing tests testcase_map = collections.defaultdict(list) filtered_step_res_seq = list() + db_list = [] for step_res_item in step_res_seq: i_stack, step_res = step_res_item + bisect_ret = None # Ignore the iterations we are not interested in if considered_iteration_set and i_stack[0] not in considered_iteration_set: continue - # The xunit attribute of LISATestStepResult results contains the - # xUnit file content generated by LISA - xunit = step_res.xunit.strip() - if not xunit: - warn("Empty LISA's xUnit for {step_name} step, iteration {i}".format( + db = step_res.db + if not db: + warn("No exekall ValueDB for {step_name} step, iteration {i}".format( step_name = step_res.step.name, i = i_stack )) continue + else: + db_list.append(db) + def key(froz_val): + return froz_val.get_id(full_qual=True, with_tags=True) - try: - et_testsuites = ET.fromstring(step_res.xunit.strip()) - except ET.ParseError as e: - warn("Could not open LISA's xUnit report: {e}".format(e=e)) - continue + # Gather all result bundles + froz_val_list = sorted(db.get_roots(), key=key) - bisect_ret = None - for et_testsuite in et_testsuites.findall('testsuite'): - testsuite_name = et_testsuite.get('name') - for et_testcase in et_testsuite.findall('testcase'): - testcase_id = et_testcase.get('name') + for froz_val in froz_val_list: + testcase_id = froz_val.get_id(qual=False, with_tags=True) # Ignore tests we are not interested in if ( @@ -2016,31 +2036,65 @@ class ExekallLISATestStep(ShellStep): continue entry = { - 'testsuite': testsuite_name, 'testcase_id': testcase_id, 'i_stack': i_stack, 'results_path': step_res.results_path, } - is_ignored = True - # It is assumed that there will only be one result subtag - for et_subtag in et_testcase: - tag = et_subtag.tag - if tag in ( - 'passed', 'failure', 'skipped', 'undecided', 'error' - ): - entry['result'] = tag - - type_name = et_subtag.get('type') - if tag == 'error': - is_ignored = is_excep_ignored(type_name, ignored_except_set) - else: - is_ignored = False + is_ignored = False - short_msg = et_subtag.get('message') - msg = et_subtag.text - entry['details'] = (type_name, short_msg, msg) - break + try: + # We only show the 1st exception, others are hidden + excep_froz_val = list(froz_val.get_excep())[0] + except IndexError: + excep_froz_val = None + + if excep_froz_val: + excep = excep_froz_val.excep + if isinstance(excep, CannotCreateError): + result = 'skipped' + else: + result = 'error' + entry['result'] = result + + type_name = get_name(type(excep)) + is_ignored |= is_excep_ignored( + type_name, + ignored_except_set, + ) + short_msg = str(excep) + msg = excep_froz_val.excep_tb + + entry['details'] = (type_name, short_msg, msg) + else: + val = froz_val.value + result_map = { + Result.PASSED: 'passed', + Result.FAILED: 'failure', + Result.UNDECIDED: 'undecided', + } + # If that is a ResultBundle, use its result to get + # most accurate info, otherwise just assume it is a + # bool-ish value + try: + result = val.result + msg = '\n'.join( + '{}: {}'.format(metric, value) + for metric, value in val.metrics.items() + ) + except AttributeError: + result = Result.PASSED if val else Result.FAILED + msg = str(val) + + # If the result is not something known, assume undecided + result = result_map.get(result, 'undecided') + + entry['result'] = result + type_name = get_name(type(val)) + short_msg = result + entry['details'] = (type_name, short_msg, msg) + + entry['froz_val'] = froz_val # Ignored testcases will not contribute to the number of # iterations @@ -2058,7 +2112,7 @@ class ExekallLISATestStep(ShellStep): if not any_entries: bisect_ret = BisectRet.NA - # Update the LISATestStepResult bisect result based on the xUnit + # Update the LISATestStepResult bisect result based on the DB # content. step_res.bisect_ret = bisect_ret @@ -2127,17 +2181,11 @@ class ExekallLISATestStep(ShellStep): os.unlink(archive_dst) os.symlink(archive_path, archive_dst) - # Save the xUnit file as well - if step_res.xunit: - xunit_log_path = os.path.join(log_dir, 'xunit.xml') - with open(xunit_log_path, 'w') as f: - f.write(step_res.xunit) - out('') # Always execute that for the potential side effects like exporting the # logs. - basic_report = super().report_results( + basic_report = super().report( step_res_seq, service_hub, export_logs=export_logs, show_basic=show_basic, verbose=verbose, ignore_non_issue=ignore_non_issue, iterations=iterations, @@ -2146,7 +2194,7 @@ class ExekallLISATestStep(ShellStep): if show_basic: out(basic_report) - # Contains the percentage of skipped, failed, crashed and passed + # Contains the percentage of skipped, failed, error and passed # iterations for every testcase. testcase_stats = dict() table_out = MLString() @@ -2167,7 +2215,7 @@ class ExekallLISATestStep(ShellStep): ('skipped', 'skipped'), ('undecided', 'undecided'), ('failure', 'FAILED'), - ('error', 'CRASHED'), + ('error', 'ERROR'), ): filtered_entry_list = [entry for entry in entry_list if entry['result'] == issue] @@ -2204,6 +2252,7 @@ class ExekallLISATestStep(ShellStep): i_stack = entry['i_stack'] results_path = '\n' + entry['results_path'] if show_artifact_dirs else '' exception_name, short_msg, msg = entry['details'] + uuid_ = entry['froz_val'].uuid if show_details == 'msg': msg = '' @@ -2216,7 +2265,9 @@ class ExekallLISATestStep(ShellStep): msg += ' ' table_out( - ' #{i_stack: <2}) {short_msg} ({exception_name}){results_path}{msg}'.format(**locals()).replace('\n', '\n\t') + ' #{i_stack: <2}) UUID={uuid_} ({exception_name}) {short_msg}{results_path}{msg}'.format( + **locals() + ).replace('\n', '\n\t') ) iterations_summary = stats['iterations_summary'] @@ -2245,7 +2296,6 @@ class ExekallLISATestStep(ShellStep): out() out(dist_out) - counts = { issue: sum( # If there is a non-zero count for that issue for that test, we @@ -2253,32 +2303,36 @@ class ExekallLISATestStep(ShellStep): bool(stats['counters'][issue]) for stats in testcase_stats.values() ) - for issue in ('error', 'failure', 'undecided', 'skipped') + for issue in ('error', 'failure', 'undecided', 'skipped', 'total') } + nr_tests = len(testcase_map) + assert counts['total'] == nr_tests + # Only account for tests that only passed and had no other issues - counts['passed'] = sum( - all( - not count - for issue, count in stats['counters'].items() - if issue != 'passed' - ) - for stats in testcase_stats.values() - ) + counts['passed'] = nr_tests - (sum(counts.values()) - nr_tests) out( - 'Crashed: {counts[error]}/{total}, ' + 'Error: {counts[error]}/{total}, ' 'Failed: {counts[failure]}/{total}, ' 'Undecided: {counts[undecided]}/{total}, ' 'Skipped: {counts[skipped]}/{total}, ' 'Passed: {counts[passed]}/{total}'.format( counts=counts, - total=len(testcase_map), + total=nr_tests, ) ) - # Write out the digest in JSON format so another tool can exploit it - if xunit2json: - update_json(xunit2json, testcase_stats) + # Write-out a merged DB + if export_db: + try: + existing_db = ValueDB.from_path(export_db) + except FileNotFoundError: + pass + else: + db_list.append(existing_db) + + merged_db = ValueDB.merge(db_list) + merged_db.to_path(export_db) return out @@ -2308,10 +2362,10 @@ class LISATestStep(ShellStep): upload_results = BoolParam('upload the LISA results directory to Artifactorial as the execution goes, and delete the local archive.'), **StepBase.options['__init__'], ), - report_results = dict( - verbose = StepBase.options['report_results']['verbose'], - show_basic = StepBase.options['report_results']['show_basic'], - iterations = StepBase.options['report_results']['iterations'], + report = dict( + verbose = StepBase.options['report']['verbose'], + show_basic = StepBase.options['report']['show_basic'], + iterations = StepBase.options['report']['iterations'], show_rates = BoolParam('show percentages of failure, error, skipped and passed tests'), show_dist = BoolParam('show graphical distribution of issues among iterations with a one letter code: passed=".", failed="F", error="#", skipped="s"'), show_pass_rate = BoolParam('always show the pass rate of tests, even when there are failures or crashes as well'), @@ -2356,7 +2410,6 @@ class LISATestStep(ShellStep): env = { 'LISA_RESULTS_BASE': results_path, - # TODO: remove that once LISA has switched to the new behavior 'LISA_RESULTS_DIR': results_path } info('Setting LISA_RESULTS_BASE = {results_path} ...'.format(**locals())) @@ -2430,12 +2483,12 @@ class LISATestStep(ShellStep): step = self, res_list = res_list, bisect_ret = bisect_ret, - xunit = xunit_report, results_path = results_path, + xunit = xunit_report, ) - def report_results(self, step_res_seq, service_hub, + def report(self, step_res_seq, service_hub, verbose = False, show_basic = False, show_rates = True, @@ -2631,7 +2684,7 @@ class LISATestStep(ShellStep): # Always execute that for the potential side effects like exporting the # logs. - basic_report = super().report_results( + basic_report = super().report( step_res_seq, service_hub, export_logs=export_logs, show_basic=show_basic, verbose=verbose, ignore_non_issue=ignore_non_issue, iterations=iterations, @@ -3270,7 +3323,7 @@ class MacroStep(StepBase): bail_out_early = BoolParam('start a new iteration when a step returned bisect status bad or untestable and skip all remaining steps'), relative_root = BoolOrStrParam('root of relative paths in class names', allow_empty=True), ), - report_results = dict() + report = dict() ) """Groups a set of steps in a logical sequence.""" @@ -3540,7 +3593,7 @@ class MacroStep(StepBase): return report - def report_results(self, macrostep_res_seq, service_hub, steps_filter=None, + def report(self, macrostep_res_seq, service_hub, steps_filter=None, step_options=dict()): """Report the results of nested steps.""" @@ -3576,7 +3629,7 @@ class MacroStep(StepBase): continue # Select the kwargs applicable for this step - kwargs = get_step_kwargs(step.cat, step.name, step, 'report_results', step_options) + kwargs = get_step_kwargs(step.cat, step.name, step, 'report', step_options) # Pass down some MacroStep-specific parameters if isinstance(step, MacroStep): @@ -3585,7 +3638,7 @@ class MacroStep(StepBase): 'steps_filter': steps_filter, }) - report_str = step.report_results( + report_str = step.report( step_res_list, service_hub, **kwargs @@ -3594,7 +3647,7 @@ class MacroStep(StepBase): # Build a temporary iteration result to aggregate the results of # any given step, as if all these results were coming from a list # of steps. This allows quickly spotting if any of the iterations - # went wrong. This must be done after calling step.report_results, + # went wrong. This must be done after calling step.report, # so that it has a chance to modify the results. bisect_ret = StepSeqResult( step, @@ -3870,31 +3923,16 @@ def do_run(slave_manager, iteration_n, stat_test, steps_filter=None, def init_yaml(yaml, relative_root): """ Initialize pyyaml to allow transparent loading and dumping of - OrderedDict, as well as an !include tag to include another file. + OrderedDict. """ - # Without that, escape sequences are used to represent unicode characters in - # plain ascii. + # Without allow_unicode, escape sequences are used to represent unicode + # characters in plain ASCII yaml.allow_unicode = True - yaml.default_flow_style = False - yaml.indent = 4 - - # Allow !include tag to include other files. Relative paths are relative - # to the folder of the root yaml file. Absolute paths are used as-is. - # Environments variables are expanded in the path. - def include_constructor(loader, node): - filename = loader.construct_scalar(node) - filename = os.path.expandvars(filename) - if os.path.isabs(filename): - path = filename - else: - path = os.path.join(relative_root, filename) + yaml.default_flow_style = True + yaml.indent(mapping=1, sequence=1, offset=0) - with open(path, 'r') as f: - return yaml.load(f) - - yaml.Constructor.add_constructor('!include', include_constructor) - - # Use OrderedDict as default mapping objects + # Dump OrderedDict as regular dictionaries, since we will reload map as + # OrderedDict as well def map_representer(dumper, data): return dumper.represent_dict(data.items()) @@ -3904,8 +3942,13 @@ def init_yaml(yaml, relative_root): yaml.Representer.add_representer(collections.OrderedDict, map_representer) yaml.Constructor.add_constructor(yaml.resolver.DEFAULT_MAPPING_TAG, map_constructor) - # Use block style for multiline strings + # Since strings are immutable, we can memoized the output to deduplicate + # strings. This will make the dumped strings live forever, but that should + # not be a big issue since everything that ends up in a YAML document is + # also living in memory anyway in our use case. + @functools.lru_cache(maxsize=None, typed=True) def str_presenter(dumper, data): + # Use block style for multiline strings style = '|' if '\n' in data else None return dumper.represent_scalar('tag:yaml.org,2002:str', data, style=style) @@ -3938,28 +3981,56 @@ class ReportPreamble(Serializable): self.preamble_version = 0 def check_report_path(path, probe_file): - if probe_file: - # Try to open as gzip + def probe_open_f(open_f, path): try: - with gzip.open(path) as trial_f: - trial_f.read(1) - except OSError as e: - # If we get a gzip-specific error, we assume a plain file - if 'gzip' in str(e): - open_f = open - else: - raise + with open_f(path) as f: + f.read(1) + except Exception: + return False else: - open_f = gzip.open + return True + + # Default to uncompressed file + open_f = open + mime_map = { + 'gzip': gzip.open, + 'xz': lzma.open, + } + + if probe_file: + for f in mime_map.values(): + if probe_open_f(f, path): + open_f = f + break else: - if mimetypes.guess_type(path)[1] == 'gzip': - open_f = gzip.open - else: - open_f = open + guessed_mime = mimetypes.guess_type(path)[1] + open_f = mime_map.get(guessed_mime, open_f) - is_yaml = not (path.endswith('.pickle') or path.endswith('.pickle.gz')) + path = pathlib.Path(path) + compo = path.name.split('.') + # By default, assume YAML unless pickle is explicitly present + is_yaml = 'pickle' not in path.name.split('.') return (open_f, is_yaml) +def disable_gc(f): + """ + Decorator to disable garbage collection during the execution of the + decorated function. + + This can speed-up code that creates a lot of objects in a short amount of + time. + """ + @functools.wraps(f) + def wrapper(*args, **kwargs): + gc.disable() + try: + return f(*args, **kwargs) + # re-enable the garbage collector in all cases + finally: + gc.enable() + + return wrapper + class Report(Serializable): """ Report body containg the result of the top level :class:`MacroStep` . @@ -4020,7 +4091,7 @@ class Report(Serializable): report=self ) with open_f(temp_path, 'wb') as f: - pickle.dump(pickle_data, f) + pickle.dump(pickle_data, f, protocol=4) # Rename the file once we know for sure that writing to the temporary # report completed with success @@ -4041,6 +4112,7 @@ class Report(Serializable): return (path, url) @classmethod + @disable_gc def _do_load(cls, path, steps_path): open_f, is_yaml = check_report_path(path, probe_file=True) @@ -4050,7 +4122,7 @@ class Report(Serializable): excep = import_steps_from_yaml(steps_path) # Then try to import the files as recorded in the report else: - excep = import_files(preamble.src_files) + excep = import_files(src_files) return excep @@ -4207,7 +4279,7 @@ class Report(Serializable): out = MLString() out('Description: {self.description}'.format(self=self)) out('Creation time: {self.creation_time}\n'.format(self=self)) - out(self.result.step.report_results( + out(self.result.step.report( [(IterationCounterStack(), self.result)], service_hub = service_hub, steps_filter = steps_filter, diff --git a/tools/bisector/bisector/xunit2json.py b/tools/bisector/bisector/xunit2json.py deleted file mode 100755 index be08458329a0744644d652f503623c329b187ce0..0000000000000000000000000000000000000000 --- a/tools/bisector/bisector/xunit2json.py +++ /dev/null @@ -1,250 +0,0 @@ -#! /usr/bin/env python3 - -import argparse -import collections -import functools -import itertools -import json -import operator -import sys - -import pandas as pd -import scipy.stats - -def error(msg): - print('Error: ' + str(msg), file=sys.stderr) - -def read_json(path): - try: - return pd.read_json(path, orient='index') - except ValueError as e: - raise FileNotFoundError("Unable to find or parse " + path) from e - -def correlate_df(events_df, threshold=0.0): - # Convert booleans to integers by multplying by 1 - events_df *= 1 - events_df = events_df.transpose().astype(float) - - # Compute the correlation between tests failures - corr_df = events_df.corr() - - # Rename the columns for better readability - corr_df.rename( - {name: i for i, name in enumerate(corr_df.index)}, - axis = 'columns', - inplace = True, - ) - corr_df.rename( - { - name: '({i}) {name}'.format(name=name, i=i) - for i, name in enumerate(corr_df.index) - }, - axis = 'rows', - inplace = True, - ) - - # Replace values below a given threshold by dots to improve readability - corr_df = corr_df.applymap((lambda x: x if abs(x) >= threshold else '.')) - return corr_df - -def compare_df(json_df1, json_df2, alpha, alternative='two-sided', non_significant=True, sort='name'): - # Use an inner-join to only consider tests that are present on both sides - merged_df = pd.merge(json_df1, json_df2, how='inner', right_index=True, - left_index=True, suffixes=('_old','_new')) - - if merged_df.empty: - raise ValueError("Merged dataframe is empty, there was no test in common") - - # Apply the Fisher exact test to all tests failures. - regression_map = collections.defaultdict(dict) - for row in merged_df.itertuples(index=True): - testcase = row[0] - failure_old = row.counters_old['failure'] - failure_new = row.counters_new['failure'] - passed_old = row.counters_old['passed'] - passed_new = row.counters_new['passed'] - - odds_ratio, p_val = scipy.stats.fisher_exact( - [ - # Ignore errors and skipped tests - [failure_old, passed_old], - [failure_new, passed_new], - ], - alternative = alternative - ) - - # Ignore errors and skipped tests - meaningful_total_old = failure_old + passed_old - meaningful_total_new = failure_new + passed_new - # If no meaningful iterations are available, return NaN - if not meaningful_total_old: - failure_old_pc = float('Inf') - else: - failure_old_pc = 100 * failure_old / (failure_old + passed_old) - if not meaningful_total_new: - failure_new_pc = float('Inf') - else: - failure_new_pc = 100 * failure_new / (failure_new + passed_new) - - delta = failure_new_pc - failure_old_pc - - regression_map[testcase] = { - 'failure_old': failure_old_pc, - 'failure_new': failure_new_pc, - 'delta': delta, - 'p-value': p_val, - # If the p-value is smaller than alpha, the regression or - # improvement is statistically significant - 'significant': (p_val <= alpha), - } - - regression_df = pd.DataFrame.from_dict(regression_map, orient='index') - if not non_significant: - regression_df = regression_df.loc[regression_df['significant']] - regression_df.drop('significant', axis=1, inplace=True) - - if sort == 'name': - regression_df.sort_index(inplace=True) - else: - regression_df.sort_values(by=[sort], ascending=False, inplace=True) - - return regression_df - -def _main(argv): - parser = argparse.ArgumentParser(description=""" - Compare tests failure rate using Fisher exact test. - """, - formatter_class=argparse.RawTextHelpFormatter) - - subparsers = parser.add_subparsers(title='subcommands', dest='subcommand') - - compare_parser = subparsers.add_parser('compare', - description="""Compare two JSON files.""" - ) - - analyze_parser = subparsers.add_parser('analyze', - description="""Analyze a given JSON file.""" - ) - - # Options for compare subcommand - compare_parser.add_argument('json', nargs=2, - help="""JSON files to compare.""") - - compare_parser.add_argument('--alpha', type=float, - default=5, - help="""Alpha risk for Fisher exact test in percents.""") - - compare_parser.add_argument('--non-significant', action='store_true', - help="""Also show non-significant changes of failure rate.""") - - compare_parser.add_argument('--sort', metavar='COL', - default='name', - help="""Sort the results by test name ("name") or by a column name.""") - - compare_parser.add_argument('--regression', action='store_true', - help="""Only look for regressions (one-sided Fisher exact test).""") - - # Options for analyze subcommand - analyze_parser.add_argument('json', nargs='+', - help="""JSON files to analyze.""") - - analyze_parser.add_argument('--corr', action='store_true', - help="""Show the correlation matrix between the tests' failures. High - correlation between tests that are always failing or passing are - usually not significant and should be filtered-out with --hide-const. - """) - - analyze_parser.add_argument('--failure-rate-csv', action='store_true', - help="""Show the failure rates in CSV.""") - - analyze_parser.add_argument('--corr-thresh', type=float, - default = 0.0, - help="""Hide correlation factors smaller than this threshold in absolute value.""") - - analyze_parser.add_argument('--hide-const', action='store_true', - help="""Remove columns and rows filled with NaN. This happens when the - variance of the failures is 0, which means correlation cannot be - defined.""") - - args = parser.parse_args(argv) - - # Do not crop any output - pd.set_option('display.width', None) - pd.set_option('display.max_columns', None) - pd.set_option('display.max_colwidth', -1) - pd.set_option('display.colheader_justify', 'center') - - if args.subcommand == 'compare': - json_df1 = read_json(args.json[0]) - json_df2 = read_json(args.json[1]) - - alpha = args.alpha / 100 - show_non_significant = args.non_significant - sort = args.sort - only_regression = args.regression - alternative = 'less' if only_regression else 'two-sided' - - try: - regression_df = compare_df( - json_df1, json_df2, - alpha, alternative, - show_non_significant, sort - ) - except KeyError as e: - error('Unknown column "{}"'.format(e)) - return 1 - - if not regression_df.empty: - print(regression_df.to_string( - formatters = { - 'failure_old':'{:5.2f}%'.format, - 'failure_new':'{:5.2f}%'.format, - 'delta':'{:6.2f}%'.format, - 'p-value':'{:4.2e}'.format, - } - )) - - if args.subcommand == 'analyze': - show_correlation = args.corr - correlation_thresh = args.corr_thresh - hide_const = args.hide_const - show_failure_rate = args.failure_rate_csv - - for json_path in args.json: - json_df = read_json(json_path) - - if show_correlation: - # Filter-out tests that never passed or never failed. They may - # have a 0 variance if they are also never skipped and are - # never erroring, which will result in NaN. Even if they don't - # have zero variance, they would be highly correlated which is - # usually not helpful. - if hide_const: - json_df = json_df[(json_df.failure != 0) & (json_df.passed != 0)] - - # Create a dataframe out of the failure events, one column per - # iteration. - failure_events_df = pd.DataFrame(json_df.events.apply(pd.Series).failure.values.tolist(), index=json_df.index) - # Fill the missing iteration using "false", since the event could not - # have happened on iteration where the test was not executed. - failure_events_df.fillna(value=False, inplace=True) - corr_df = correlate_df(failure_events_df, correlation_thresh) - print(corr_df.to_string(float_format=lambda x: "{:.2f}".format(x))) - - if show_failure_rate: - failure_df= json_df['failure']/(json_df['passed'] + json_df['failure']) - print(failure_df.to_csv(), end='') - -def main(argv=sys.argv[1:]): - try: - return_code = _main(argv=argv) - except FileNotFoundError as e: - error(e) - return_code = 1 - - sys.exit(return_code) - -if __name__ == '__main__': - main() - -# vim :set tabstop=4 shiftwidth=4 expandtab textwidth=80 diff --git a/tools/bisector/setup.py b/tools/bisector/setup.py index f6999e964edee8fcf6032ea11a799c6bf1e92e12..dbc2811f8dc91496fbf315d76f7883bb520c9b13 100755 --- a/tools/bisector/setup.py +++ b/tools/bisector/setup.py @@ -35,7 +35,6 @@ setup( entry_points={ 'console_scripts': [ 'bisector=bisector.bisector:main', - 'xunit2json-analyze=bisector.xunit2json:main' ], }, python_requires='>= 3.5', diff --git a/tools/exekall/exekall/_utils.py b/tools/exekall/exekall/_utils.py index 66c7ee259c03a6ce0cc4d9181d01d7c1bfd1859e..f22c05e9cdfccd7e8f9b6df9d9a7bbed250da570 100644 --- a/tools/exekall/exekall/_utils.py +++ b/tools/exekall/exekall/_utils.py @@ -82,7 +82,7 @@ def get_mro(cls): assert isinstance(cls, type) return inspect.getmro(cls) -def get_name(obj, full_qual=True, qual=True, desugar_cls_meth=False): +def get_name(obj, full_qual=True, qual=True): # full_qual enabled implies qual enabled _qual = qual or full_qual # qual disabled implies full_qual disabled diff --git a/tools/exekall/exekall/customization.py b/tools/exekall/exekall/customization.py index 60d55cae93568887cd87d36b3511575683f4d8e3..abf05ed4c699f196101d96c4a28b9049513d4ace 100644 --- a/tools/exekall/exekall/customization.py +++ b/tools/exekall/exekall/customization.py @@ -60,7 +60,14 @@ class AdaptorBase: return self.hidden_op_set @staticmethod - def register_cli_param(parser): + def register_run_param(parser): + pass + + @staticmethod + def register_compare_param(parser): + pass + + def compare_db_list(self, db_list): pass @staticmethod @@ -70,9 +77,9 @@ class AdaptorBase: def resolve_cls_name(self, goal): return utils.get_class_from_name(goal) - @staticmethod - def load_db(*args, **kwargs): - return ValueDB.from_path(*args, **kwargs) + @classmethod + def reload_db(cls, db, path=None): + return db def finalize_expr(self, expr): pass diff --git a/tools/exekall/exekall/engine.py b/tools/exekall/exekall/engine.py index 512b01afceffe33fb3594a528e5ceae9f6a702f9..af8b2489b385cdfe93973f9f3edee723b387015b 100644 --- a/tools/exekall/exekall/engine.py +++ b/tools/exekall/exekall/engine.py @@ -54,10 +54,11 @@ class ValueDB: # dumping speed. PICKLE_PROTOCOL = 4 - def __init__(self, froz_val_seq_list): + def __init__(self, froz_val_seq_list, adaptor_cls=None): # Avoid storing duplicate FrozenExprVal sharing the same value/excep # UUID self.froz_val_seq_list = self._dedup_froz_val_seq_list(froz_val_seq_list) + self.adaptor_cls = adaptor_cls @classmethod def _dedup_froz_val_seq_list(cls, froz_val_seq_list): @@ -110,12 +111,21 @@ class ValueDB: return cls._froz_val_dfs(froz_val_seq_list, rewrite_graph) @classmethod - def merge(cls, db_seq): - froz_val_seq_list = list(itertools.chain(*( + def merge(cls, db_list): + db_list = list(db_list) + adaptor_cls_set = { + db.adaptor_cls + for db in db_list + } + if len(adaptor_cls_set) != 1: + raise ValueError('Cannot merge ValueDB with different adaptor classes: {}'.format(adaptor_cls_set)) + adaptor_cls = utils.take_first(adaptor_cls_set) + + froz_val_seq_list = list(itertools.chain.from_iterable( db.froz_val_seq_list - for db in db_seq - ))) - return cls(froz_val_seq_list) + for db in db_list + )) + return cls(froz_val_seq_list, adaptor_cls=adaptor_cls) @classmethod def from_path(cls, path, relative_to=None): @@ -133,6 +143,31 @@ class ValueDB: db = pickle.load(f) assert isinstance(db, cls) + # Apply some post-processing on the DB with a known path + cls._call_adaptor_reload(db, path=path) + + return db + + @classmethod + def _reload_serialized(cls, dct): + db = cls.__new__(cls) + db.__dict__ = dct + + # Apply some post-processing on the DB that was just reloaded, with no + # path since we don't even know if that method was invoked on something + # serialized in a file. + cls._call_adaptor_reload(db, path=None) + + return db + + def __reduce_ex__(self, protocol): + return (self._reload_serialized, (self.__dict__,)) + + @staticmethod + def _call_adaptor_reload(db, path): + adaptor_cls = db.adaptor_cls + if adaptor_cls: + db = adaptor_cls.reload_db(db, path=path) return db def to_path(self, path, optimize=True): @@ -246,6 +281,59 @@ class ValueDB: else: return froz_val_set_set + def get_roots(self, flatten=True): + froz_val_set_set = { + frozenset(froz_val_seq) + for froz_val_seq in self.froz_val_seq_list + } + if flatten: + return set(utils.flatten_seq(froz_val_set_set)) + else: + return froz_val_set_set + + def prune_by_predicate(self, predicate): + def prune(froz_val): + if isinstance(froz_val, PrunedFrozVal): + return froz_val + elif predicate(froz_val): + return PrunedFrozVal(froz_val) + else: + # Edit the param_map in-place, so we keep it potentially shared + # if possible. + for param, param_froz_val in list(froz_val.param_map.items()): + froz_val.param_map[param] = prune(param_froz_val) + + return froz_val + + def make_froz_val_seq(froz_val_seq): + froz_val_list = [ + prune(froz_val) + for froz_val in froz_val_seq + # Just remove the root PrunedFrozVal, since they are useless at + # this level (i.e. nothing depends on them) + if not predicate(froz_val) + ] + + # All param_map will be the same in the list by construction + try: + param_map = froz_val_list[0].param_map + except IndexError: + param_map = {} + + return FrozenExprValSeq( + froz_val_list=froz_val_list, + param_map=param_map, + ) + + return self.__class__( + froz_val_seq_list=[ + make_froz_val_seq(froz_val_seq) + # That will keep proper inter-object references as in the + # original graph of objects + for froz_val_seq in copy.deepcopy(self.froz_val_seq_list) + ] + ) + def get_all(self, **kwargs): return self.get_by_predicate(lambda froz_val: True, **kwargs) @@ -265,6 +353,7 @@ class ValueDB: return self.get_by_predicate(predicate, **kwargs) + class ScriptValueDB: def __init__(self, db, var_name='db'): self.db = db @@ -549,12 +638,15 @@ class ExpressionBase: return self.get_all_script([self], *args, **kwargs) @classmethod - def get_all_script(cls, expr_list, prefix='value', db_path='VALUE_DB.pickle.xz', db_relative_to=None, db_loader=None, db=None): + def get_all_script(cls, expr_list, prefix='value', db_path='VALUE_DB.pickle.xz', db_relative_to=None, db=None, adaptor_cls=None): assert expr_list if db is None: froz_val_seq_list = FrozenExprValSeq.from_expr_list(expr_list) - script_db = ScriptValueDB(ValueDB(froz_val_seq_list)) + script_db = ScriptValueDB(ValueDB( + froz_val_seq_list, + adaptor_cls=adaptor_cls, + )) else: script_db = ScriptValueDB(db) @@ -607,15 +699,6 @@ class ExpressionBase: result_name_map[expr] = result_name - # Get the name of the customized db_loader - if db_loader is None: - db_loader_name = '{cls_name}.from_path'.format( - cls_name=utils.get_name(ValueDB, full_qual=True), - ) - else: - module_name_set.add(inspect.getmodule(db_loader).__name__) - db_loader_name = utils.get_name(db_loader, full_qual=True) - # Add all the imports header = ( '#! /usr/bin/env python3\n\n' + @@ -649,9 +732,9 @@ class ExpressionBase: else: db_relative_to = '' - header += '{db} = {db_loader_name}({path}{db_relative_to})\n'.format( + header += '{db} = {db_loader}({path}{db_relative_to})\n'.format( db = script_db.var_name, - db_loader_name = db_loader_name, + db_loader = utils.get_name(ValueDB.from_path, full_qual=True), path = repr(str(db_path)), db_relative_to = db_relative_to ) @@ -1456,6 +1539,9 @@ class Expression(ExpressionBase): class AnnotationError(Exception): pass +class PartialAnnotationError(AnnotationError): + pass + class ForcedParamType: pass @@ -1730,6 +1816,7 @@ class Operator: sig = self.signature first_param = utils.take_first(sig.parameters) annotation_map = utils.resolve_annotations(self.annotations, self.callable_globals) + pristine_annotation_map = copy.copy(annotation_map) extra_ignored_param = set() # If it is a class @@ -1771,9 +1858,16 @@ class Operator: param not in extra_ignored_param and param not in self.ignored_param ): - raise AnnotationError('Missing annotation for "{param}" parameters of operator "{op}"'.format( + # If some parameters are annotated but not all, we raise a + # slightly different exception to allow better reporting + if pristine_annotation_map: + excep_cls = PartialAnnotationError + else: + excep_cls = AnnotationError + + raise excep_cls('Missing annotation for "{param}" parameters of operator "{op}"'.format( param = param, - op = self.callable_, + op = self.name, )) # Iterate over keys and values of "mapping" in the same order as "keys" @@ -2071,17 +2165,13 @@ class ExprValBase(collections.abc.Mapping): return id(self) def __getitem__(self, k): - if k == 'return': - return self.value - else: - return self.param_map[k] + return self.param_map[k] def __len__(self): - # account for 'return' - return len(self.param_map) + 1 + return len(self.param_map) def __iter__(self): - return itertools.chain(self.param_map.keys(), ['return']) + return self.param_map.keys() class FrozenExprVal(ExprValBase): def __init__(self, @@ -2094,6 +2184,11 @@ class FrozenExprVal(ExprValBase): self.recorded_id_map = recorded_id_map super().__init__(param_map=param_map, value=value, excep=excep) + if self.excep is not NoValue: + self.excep_tb = utils.format_exception(self.excep) + else: + self.excep_tb = None + @property def type_names(self): return [ @@ -2153,10 +2248,15 @@ class FrozenExprVal(ExprValBase): return froz_val @staticmethod + # Since tuples are immutable, reuse the same tuple by memoizing the + # function. That allows more compact serialized representation in both YAML + # and Pickle. + @utils.once def _make_id_key(**kwargs): return tuple(sorted(kwargs.items())) def get_id(self, full_qual=True, qual=True, with_tags=True): + full_qual = full_qual and qual key = self._make_id_key( full_qual=full_qual, qual=qual, @@ -2164,6 +2264,18 @@ class FrozenExprVal(ExprValBase): ) return self.recorded_id_map[key] +class PrunedFrozVal(FrozenExprVal): + def __init__(self, froz_val): + super().__init__( + param_map={}, + value=NoValue, + excep=NoValue, + uuid=froz_val.uuid, + callable_qualname=froz_val.callable_qualname, + callable_name=froz_val.callable_name, + recorded_id_map=copy.copy(froz_val.recorded_id_map), + ) + class FrozenExprValSeq(collections.abc.Sequence): def __init__(self, froz_val_list, param_map): self.froz_val_list = froz_val_list diff --git a/tools/exekall/exekall/main.py b/tools/exekall/exekall/main.py index 0a500fc45af710bcb9bbdf14311d692fd5e2666e..604aa8c8af29668b33af8fb10aabd1d8b202553d 100755 --- a/tools/exekall/exekall/main.py +++ b/tools/exekall/exekall/main.py @@ -339,6 +339,16 @@ should be treated as read-only. merge_parser.add_argument('--copy', action='store_true', help="""Force copying files, instead of using hardlinks.""") + + compare_parser = subparsers.add_parser('compare', + description=""" +Compare two DBs produced by exekall run. + """, + formatter_class=argparse.RawTextHelpFormatter) + + compare_parser.add_argument('db', nargs=2, + help="""DBs created using exekall run to compare.""") + # Avoid showing help message on the incomplete parser. Instead, we carry on # and the help will be displayed after the parser customization of run # subcommand has a chance to take place. @@ -369,7 +379,7 @@ should be treated as read-only. # Some subcommands need not parser customization, in which case we more # strictly parse the command line - if args.subcommand not in ['run']: + if args.subcommand not in ('run', 'compare'): parser.parse_args(argv) if args.subcommand == 'run': @@ -383,6 +393,43 @@ should be treated as read-only. use_hardlink=(not args.copy), ) + elif args.subcommand == 'compare': + return do_compare( + parser=parser, + compare_parser=compare_parser, + argv=argv, + db_path_list=args.db, + ) + +def do_compare(parser, compare_parser, argv, db_path_list): + assert len(db_path_list) == 2 + db_list = [ + engine.ValueDB.from_path(path) + for path in db_path_list + ] + + adaptor_cls_set = { + db.adaptor_cls + for db in db_list + } + if len(adaptor_cls_set) != 1: + raise ValueError('Cannot compare DBs that were built using a different adaptor: {}'.format(adaptor_cls_set)) + adaptor_cls = utils.take_first(adaptor_cls_set) + + # Add all the CLI arguments of the adaptor before reparsing the + # command line. + adaptor_cls.register_compare_param(compare_parser) + + # Reparse the command line after the adaptor had a chance to add its own + # arguments. + args = parser.parse_args(argv) + + # Create the adaptor with the args, so it can use it to implement + # comparison + adaptor = adaptor_cls(args) + + return adaptor.compare_db_list(db_list) + def do_merge(artifact_dirs, output_dir, use_hardlink=True, output_exist=False): output_dir = pathlib.Path(output_dir) @@ -399,13 +446,26 @@ def do_merge(artifact_dirs, output_dir, use_hardlink=True, output_exist=False): os.makedirs(str(output_dir), exist_ok=output_exist) merged_db_path = output_dir/DB_FILENAME + (output_dir/'BY_UUID').mkdir(exist_ok=True) testsession_uuid_list = [] for artifact_dir in artifact_dirs: with (artifact_dir/'UUID').open(encoding='utf-8') as f: testsession_uuid = f.read().strip() testsession_uuid_list.append(testsession_uuid) + src_by_uuid = artifact_dir/'BY_UUID' + for uuid_symlink in src_by_uuid.iterdir(): + target = uuid_symlink.resolve() + target = pathlib.Path('..', target.relative_to(artifact_dir.resolve())) + (output_dir/'BY_UUID'/uuid_symlink.name).symlink_to(target) + + link_base_path = pathlib.Path('ORIGIN', testsession_uuid) + shutil.copytree( + str(src_by_uuid), + str(output_dir/link_base_path/'BY_UUID'), + symlinks=True, + ) # Copy all the files recursively for dirpath, dirnames, filenames in os.walk(str(artifact_dir)): @@ -470,6 +530,9 @@ def do_merge(artifact_dirs, output_dir, use_hardlink=True, output_exist=False): with (output_dir/'UUID').open('wt') as f: f.write(combined_uuid+'\n') + if output_exist: + db_path_list.append(merged_db_path) + merged_db = engine.ValueDB.merge( engine.ValueDB.from_path(path) for path in db_path_list @@ -490,7 +553,7 @@ def do_run(args, parser, run_parser, argv): raise RuntimeError('Adaptor "{}" cannot be found'.format(adaptor_name)) # Add all the CLI arguments of the adaptor before reparsing the # command line. - adaptor_cls.register_cli_param(run_parser) + adaptor_cls.register_run_param(run_parser) # Reparse the command line after the adaptor had a chance to add its own # arguments. @@ -577,7 +640,7 @@ def do_run(args, parser, run_parser, argv): if load_db_path_list: db_list = [] for db_path in load_db_path_list: - db = adaptor.load_db(db_path) + db = engine.ValueDB.from_path(db_path) op_set.update( load_from_db(db, adaptor, non_reusable_type_set, load_db_pattern_list, load_db_uuid_list, load_db_uuid_args @@ -717,6 +780,7 @@ def do_run(args, parser, run_parser, argv): testsession_uuid=testsession_uuid, hidden_callable_set=hidden_callable_set, only_template_scripts=only_template_scripts, + adaptor_cls=adaptor_cls, verbose=verbose, ) @@ -732,13 +796,13 @@ def do_run(args, parser, run_parser, argv): return exec_ret_code def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, - hidden_callable_set, only_template_scripts, verbose): + hidden_callable_set, only_template_scripts, adaptor_cls, verbose): if not only_template_scripts: with (artifact_dir/'UUID').open('wt') as f: f.write(testsession_uuid+'\n') - db_loader = adaptor.load_db + (artifact_dir/'BY_UUID').mkdir() out('\nArtifacts dir: {}\n'.format(artifact_dir)) @@ -798,7 +862,6 @@ def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, prefix = 'testcase', db_path = os.path.join('..', DB_FILENAME), db_relative_to = '__file__', - db_loader=db_loader )[1]+'\n', ) @@ -916,15 +979,14 @@ def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, prefix = 'testcase', db_path = os.path.join('..', '..', DB_FILENAME), db_relative_to = '__file__', - db_loader=db_loader )[1]+'\n', ) def format_uuid(expr_val_list): - uuid_list = sorted( + uuid_list = sorted({ expr_val.uuid for expr_val in expr_val_list - ) + }) return '\n'.join(uuid_list) def write_uuid(path, *args): @@ -935,10 +997,21 @@ def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, write_uuid(expr_artifact_dir/'REUSED_VALUES_UUID', reused_expr_val_set) write_uuid(expr_artifact_dir/'COMPUTED_VALUES_UUID', computed_expr_val_set) + # From there, use a relative path for symlinks + expr_artifact_dir = pathlib.Path('..', expr_artifact_dir.relative_to(artifact_dir)) + computed_uuid_set = { + expr_val.uuid + for expr_val in computed_expr_val_set + } + computed_uuid_set.add(expr.uuid) + for uuid_ in computed_uuid_set: + (artifact_dir/'BY_UUID'/uuid_).symlink_to(expr_artifact_dir) + db = engine.ValueDB( engine.FrozenExprValSeq.from_expr_list( expr_list, hidden_callable_set=hidden_callable_set - ) + ), + adaptor_cls=adaptor_cls, ) db_path = artifact_dir/DB_FILENAME @@ -961,7 +1034,7 @@ def exec_expr_list(expr_list, adaptor, artifact_dir, testsession_uuid, db_path=db_path.relative_to(artifact_dir), db_relative_to='__file__', db=db, - db_loader=db_loader, + adaptor_cls=adaptor_cls, ) with script_path.open('wt', encoding='utf-8') as f: diff --git a/tools/exekall/exekall/utils.py b/tools/exekall/exekall/utils.py index caedc03405fe9e6d7cd47ea40c73f72f2706fd63..f26971e36a8226e66bb3439bc3936cec92bf2dfe 100644 --- a/tools/exekall/exekall/utils.py +++ b/tools/exekall/exekall/utils.py @@ -33,18 +33,30 @@ def get_callable_set(module_set, verbose=False): module.__package__.split('.', 1)[0] for module in module_set } callable_set = set() + visited_obj_set = set() for module in get_recursive_module_set(module_set, package_set): - callable_set.update(_get_callable_set(module, verbose=verbose)) + callable_set.update(_get_callable_set( + module, + visited_obj_set, + verbose=verbose, + )) return callable_set -def _get_callable_set(module, verbose): +def _get_callable_set(module, visited_obj_set, verbose): + log_f = info if verbose else debug callable_pool = set() + for name, obj in vars(module).items(): # skip internal classes that may end up being exposed as a global if inspect.getmodule(obj) is engine: continue + if id(obj) in visited_obj_set: + continue + else: + visited_obj_set.add(id(obj)) + # If it is a class, get the list of methods if isinstance(obj, type): callable_list = list(dict(inspect.getmembers(obj, predicate=callable)).values()) @@ -57,6 +69,14 @@ def _get_callable_set(module, verbose): for callable_ in callable_list: try: param_list, return_type = engine.Operator(callable_).get_prototype() + # If the callable is partially annotated, warn about it since it is + # likely to be a mistake. + except engine.PartialAnnotationError as e: + log_f('Partially-annotated callable will not be used: {e}'.format( + callable=get_name(callable_), + e=e, + )) + continue # If something goes wrong, that means it is not properly annotated # so we just ignore it except (AttributeError, ValueError, KeyError, engine.AnnotationError): @@ -66,7 +86,6 @@ def _get_callable_set(module, verbose): # return a abstract base class instance, since that would not work # anyway. if inspect.isabstract(return_type): - log_f = info if verbose else debug log_f('Instances of {} will not be created since it has non-implemented abstract methods'.format( get_name(return_type, full_qual=True) ))