From a287e37c005a58d026599ef7af53c6c083984609 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Thu, 7 Sep 2023 11:02:08 +0100 Subject: [PATCH 1/6] tools/exekall: Restrict selected callables FIX Only select callables that are defined in the modules being scanned (passed on the command line and any of their (indirect) children). If a callable is defined in another module than the one being currently scanned, it will be ignored. Since all the children of user-passed packages are scanned, this has little effect, but allows more precise selection when 2 packages or more are selected in case they depend on each other. --- tools/exekall/exekall/utils.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tools/exekall/exekall/utils.py b/tools/exekall/exekall/utils.py index 655bbe794..5ae97cd3d 100644 --- a/tools/exekall/exekall/utils.py +++ b/tools/exekall/exekall/utils.py @@ -52,9 +52,9 @@ def get_callable_set(module_set, verbose=False): visited_module_set.add(module) callable_set_ = _get_callable_set( - module, - visited_obj_set, - package_set=package_set, + namespace=module, + module=module, + visited_obj_set=visited_obj_set, verbose=verbose, ) @@ -100,9 +100,10 @@ def _get_members(*args, **kwargs): warnings.simplefilter(action='ignore') return inspect.getmembers(*args, **kwargs) -def _get_callable_set(namespace, visited_obj_set, package_set, verbose): +def _get_callable_set(namespace, module, visited_obj_set, verbose): """ :param namespace: Module or class + :param module: Module the namespace was defined in, or ``None`` to be ignored. """ log_f = info if verbose else debug callable_pool = set() @@ -124,13 +125,13 @@ def _get_callable_set(namespace, visited_obj_set, package_set, verbose): attributes.append(namespace) def select(attr): - module = inspect.getmodule(attr) + _module = inspect.getmodule(attr) return ( # Module of builtins is None - module is None or + _module is None or # skip internal classes that may end up being exposed as a global - module is not engine and - get_package(module) in package_set + _module is not engine and + (True if module is None else _module is module) ) visited_obj_set.update(attributes) @@ -158,7 +159,16 @@ def _get_callable_set(namespace, visited_obj_set, package_set, verbose): ) ): callable_pool.update( - _get_callable_set(callable_, visited_obj_set, package_set, verbose) + _get_callable_set( + namespace=callable_, + visited_obj_set=visited_obj_set, + verbose=verbose, + # We want to get all the attributes in classes, regardless + # on what module owns them. For example, we want to select + # a method inherited from a base class even if that base + # class and the method definition lives somewhere else. + module=None, + ) ) # Functions defined in a class are methods, and have to be wrapped so -- GitLab From b07e8e4681d61aa9f7ac564dd4aa5d40e72fd3de Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Tue, 5 Sep 2023 17:59:19 +0100 Subject: [PATCH 2/6] tools/exekall: Handle namespace packages FEATURE If a valid path to a namespace package is provided, also import that package as a Python name. This ensures that even if exekall is ran in the repository of one part of that namespace package such that the path and the Python name are equal, all the other parts will be found as well. Inference of the top-level package is not possible in general for namespace packages as there is no __init__.py to be found in a parent folder. However, when the module is being imported by name, we do know what is the toplevel package so we can preserve that information when infering the name of submodules. Ensure namespace packages are added to sys.modules and as attribute on the parent module. --- tools/exekall/exekall/_utils.py | 225 ++++++++++++++++++++------------ 1 file changed, 140 insertions(+), 85 deletions(-) diff --git a/tools/exekall/exekall/_utils.py b/tools/exekall/exekall/_utils.py index dc71bc93e..70417d1dc 100644 --- a/tools/exekall/exekall/_utils.py +++ b/tools/exekall/exekall/_utils.py @@ -40,6 +40,8 @@ import argparse import time import datetime import copy +import warnings +import os.path DB_FILENAME = 'VALUE_DB.pickle.xz' @@ -518,31 +520,53 @@ def error(msg): EXEKALL_LOGGER.error(msg) -def infer_mod_name(python_src): +def infer_mod_name(python_src, package_roots=None, excep_handler=None): """ Compute the module name of a Python source file by inferring its top-level package """ - python_src = pathlib.Path(python_src) - module_path = None + python_src = pathlib.Path(python_src).absolute() + module_path1 = None + module_path2 = None # First look for the outermost package we find in the parent directories. # If we were supplied a path, it will not try to go past its highest folder. for folder in reversed(python_src.parents): if pathlib.Path(folder, '__init__.py').exists(): package_root_parent = folder.parents[0] - module_path = python_src.relative_to(package_root_parent) + module_path1 = python_src.relative_to(package_root_parent) break # If no package was found, we try to find it through sys.path in case it is # only using namespace packages else: for package_root_parent in sys.path: try: - module_path = python_src.relative_to(package_root_parent) + module_path1 = python_src.relative_to(package_root_parent) break except ValueError: continue + if package_roots: + for path in package_roots: + assert path + path = pathlib.Path(path).absolute() + package_root_parent = path.parents[0] + try: + module_path2 = python_src.relative_to(package_root_parent) + except ValueError: + continue + else: + break + else: + raise ValueError('Could not find {python_src} in any of the package roots: {package_roots}') + + # Pick the longest path of both + paths = [module_path1, module_path2] + module_path, *_ = sorted( + filter(bool, paths), + key=lambda path: len(path.parents), reverse=True + ) + # If we found the top-level package if module_path is not None: module_parents = list(module_path.parents) @@ -550,10 +574,12 @@ def infer_mod_name(python_src): # Import all parent package_names before we import the module for package_name in reversed(module_parents[:-1]): - package_name = import_file( + import_file( pathlib.Path(package_root_parent, package_name), module_name='.'.join(package_name.parts), + package_roots=package_roots, is_package=True, + excep_handler=excep_handler, ) module_dotted_path = list(module_parents[0].parts) + [module_basename] @@ -622,16 +648,37 @@ def import_modules(paths_or_names, excep_handler=None): def import_it(path_or_name): # Recursively import all modules when passed folders if path_or_name.is_dir(): - yield from import_folder(path_or_name, excep_handler=excep_handler) + modules = list(import_folder(path_or_name, package_roots=[path_or_name], excep_handler=excep_handler)) + yield from modules + + # Import by name the top-level package corresponding to the imports + # we just did, and import that top-level package by name if it is a + # namespace package. This will ensure we find all the legs of the + # namespace package, wherever they are + if modules: + names = [m.__name__ for m in modules] + prefix = os.path.commonprefix(names) + toplevel = '.'.join(prefix.split('.')[:-1]) + + try: + toplevel = importlib.import_module(toplevel) + except Exception: + pass + else: + is_namespace = len(toplevel.__path__ or []) + if is_namespace: + # Best-effort attempt, we ignore exceptions here as a + # namespace package is open to the world and we don't + # want to prevent loading part of it simply because + # another leg is broken. + yield from import_name_recursively(toplevel.__name__, excep_handler=lambda *args, **kwargs: None) + # If passed a file, a symlink or something like that elif path_or_name.exists(): - try: - yield import_file(path_or_name) - except Exception as e: - if excep_handler: - return excep_handler(str(path_or_name), e) - else: - raise + mod = import_file(path_or_name, excep_handler=excep_handler) + # Could be an exception handler return value + if inspect.ismodule(mod): + yield mod # Otherwise, assume it is just a module name else: yield from import_name_recursively(path_or_name, excep_handler=excep_handler) @@ -660,6 +707,7 @@ def import_name_recursively(name, excep_handler=None): return excep_handler(name_str, e) else: raise + try: paths = mod.__path__ # This is a plain module @@ -667,26 +715,24 @@ def import_name_recursively(name, excep_handler=None): yield mod # This is a package, so we import all the submodules recursively else: + root, *_ = str(name).split('.', 1) + package_roots = importlib.import_module(root).__path__ for path in paths: - yield from import_folder(pathlib.Path(path), excep_handler=excep_handler) + yield from import_folder(pathlib.Path(path), package_roots=package_roots, excep_handler=excep_handler) -def import_folder(path, excep_handler=None): +def import_folder(path, package_roots=None, excep_handler=None): """ Import all modules contained in the given folder, recurisvely. """ for python_src in glob.iglob(str(path / '**' / '*.py'), recursive=True): - try: - yield import_file(python_src) - except Exception as e: - if excep_handler: - excep_handler(python_src, e) - continue - else: - raise + mod = import_file(python_src, package_roots=package_roots, excep_handler=excep_handler) + # Could be an exception handler return value + if inspect.ismodule(mod): + yield mod -def import_file(python_src, module_name=None, is_package=False): +def import_file(python_src, *args, excep_handler=None, **kwargs): """ Import a module. @@ -697,11 +743,24 @@ def import_file(python_src, module_name=None, is_package=False): name is inferred using :func:`infer_mod_name` :type module_name: str + :param package_roots: Paths to the root of the package, used by + :func:`infer_mod_name`. A namespace package can have multiple roots. + :type package_roots: list(str) + :param is_package: ``True`` if the module is a package. If a folder or ``__init__.py`` is passed, this is forcefully set to ``True``. :type is_package: bool """ + try: + return _import_file(python_src, *args, **kwargs) + except Exception as e: + if excep_handler: + return excep_handler(python_src, e) + else: + raise + +def _import_file(python_src, module_name=None, is_package=False, package_roots=None, excep_handler=None): python_src = pathlib.Path(python_src).resolve() # Directly importing __init__.py does not really make much sense and may @@ -710,14 +769,16 @@ def import_file(python_src, module_name=None, is_package=False): return import_file( python_src=python_src.parent, module_name=module_name, - is_package=True + package_roots=package_roots, + is_package=True, + excep_handler=excep_handler, ) if python_src.is_dir(): is_package = True if module_name is None: - module_name = infer_mod_name(python_src) + module_name = infer_mod_name(python_src, package_roots=package_roots, excep_handler=excep_handler) # Check if the module has already been imported if module_name in sys.modules: @@ -727,21 +788,22 @@ def import_file(python_src, module_name=None, is_package=False): if is_package: # Signify that it is a package to # importlib.util.spec_from_file_location - submodule_search_locations = [str(python_src)] init_py = pathlib.Path(python_src, '__init__.py') # __init__.py does not exists for namespace packages if init_py.exists(): python_src = init_py else: is_namespace_package = True - else: - submodule_search_locations = None - # Python >= 3.5 style - if hasattr(importlib.util, 'module_from_spec'): - # We manually build a ModuleSpec for namespace packages, since - # spec_from_file_location apparently does not handle them - if is_namespace_package: + # We manually build a ModuleSpec for namespace packages, since + # spec_from_file_location apparently does not handle them + if is_namespace_package: + # If we get the module spec this way, we will automatically get the + # full submodule_search_location so that the namespace package + # contains all its legs + spec = importlib.util.find_spec(module_name) + + if spec is None: spec = importlib.machinery.ModuleSpec( name=module_name, # loader is None for namespace packages @@ -749,59 +811,52 @@ def import_file(python_src, module_name=None, is_package=False): is_package=True, ) # Set __path__ for namespace packages - spec.submodule_search_locations = submodule_search_locations - else: - spec = importlib.util.spec_from_file_location( - module_name, - str(python_src), - submodule_search_locations=submodule_search_locations, - ) - if spec is None: - raise ModuleNotFoundError( - 'Could not find module "{module}" at {path}'.format( - module=module_name, - path=python_src - ), - name=module_name, - path=python_src, - ) - - module = importlib.util.module_from_spec(spec) - if not is_namespace_package: - try: - # Register module before executing it so relative imports will - # work - sys.modules[module_name] = module - # Nothing to execute in a namespace package - spec.loader.exec_module(module) - # If the module cannot be imported cleanly regardless of the reason, - # make sure we remove it from sys.modules since it's broken. Future - # attempt to import it should raise again, rather than returning the - # broken module - except BaseException: - with contextlib.suppress(KeyError): - del sys.modules[module_name] - raise - else: - - # Set the attribute on the parent package, so that this works: - # - # import foo.bar - # print(foo.bar) - try: - parent_name, last = module_name.rsplit('.', 1) - except ValueError: - pass - else: - parent = sys.modules[parent_name] - setattr(parent, last, module) - + spec.submodule_search_locations = [str(python_src)] + else: + spec = importlib.util.spec_from_file_location( + module_name, + str(python_src), + ) + if spec is None: + raise ModuleNotFoundError( + 'Could not find module "{module}" at {path}'.format( + module=module_name, + path=python_src + ), + name=module_name, + path=python_src, + ) - # Python <= v3.4 style + module = importlib.util.module_from_spec(spec) + try: + # Register module before executing it so relative imports will + # work + sys.modules[module_name] = module + with warnings.catch_warnings(): + warnings.simplefilter(action='ignore') + spec.loader.exec_module(module) + # If the module cannot be imported cleanly regardless of the reason, + # make sure we remove it from sys.modules since it's broken. Future + # attempt to import it should raise again, rather than returning the + # broken module + except BaseException: + with contextlib.suppress(KeyError): + del sys.modules[module_name] + raise else: - module = importlib.machinery.SourceFileLoader( - module_name, str(python_src)).load_module() + + # Set the attribute on the parent package, so that this works: + # + # import foo.bar + # print(foo.bar) + try: + parent_name, last = module_name.rsplit('.', 1) + except ValueError: + pass + else: + parent = sys.modules[parent_name] + setattr(parent, last, module) sys.modules[module_name] = module importlib.invalidate_caches() -- GitLab From 59e23a877b8120e3bdda325b5d36525e041719cd Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Wed, 6 Sep 2023 12:03:45 +0100 Subject: [PATCH 3/6] tools/exekall: Add run --dependency option FEATURE Add a --dependency option that behaves the same as passing a module as a positional argument, except that it will be ignored when computing the list of allowed root operators. This allows using a package to satisfy dependencies in expressions while controlling what expressions are selected using the modules passed as positional arguments. --- doc/man1/exekall.1 | 9 +++++++-- tools/exekall/exekall/main.py | 16 ++++++++++++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/doc/man1/exekall.1 b/doc/man1/exekall.1 index bb8c7a437..d8e99dd31 100644 --- a/doc/man1/exekall.1 +++ b/doc/man1/exekall.1 @@ -67,8 +67,8 @@ subcommands: .sp .nf .ft C -usage: exekall run [\-h] [\-s ID_PATTERN] [\-\-list] [\-n N] [\-\-load\-db LOAD_DB] - [\-\-load\-type TYPE_PATTERN] +usage: exekall run [\-h] [\-\-dependency DEPENDENCY] [\-s ID_PATTERN] [\-\-list] + [\-n N] [\-\-load\-db LOAD_DB] [\-\-load\-type TYPE_PATTERN] [\-\-replay REPLAY | \-\-load\-uuid LOAD_UUID] [\-\-artifact\-dir ARTIFACT_DIR | \-\-artifact\-root ARTIFACT_ROOT] [\-\-no\-save\-value\-db] [\-\-verbose] [\-\-pdb] @@ -98,6 +98,11 @@ positional arguments: optional arguments: \-h, \-\-help show this help message and exit + \-\-dependency DEPENDENCY + Same as specifying a module in PYTHON_MODULES but will only be used to + build an expression if it would have been selected without that module + listed. Operators defined in modules listed here will not be used as + the root operator in any expression. \-s ID_PATTERN, \-\-select ID_PATTERN Only run the expressions with an ID matching any of the supplied pattern. A pattern starting with \(dq!\(dq can be used to exclude IDs diff --git a/tools/exekall/exekall/main.py b/tools/exekall/exekall/main.py index 3c4401538..a3892e99a 100755 --- a/tools/exekall/exekall/main.py +++ b/tools/exekall/exekall/main.py @@ -232,6 +232,11 @@ please run ``exekall run YOUR_SOURCES_OR_MODULES --help``. metavar='PYTHON_MODULES', help="""Python modules files or module names. If passed a folder, all contained files recursively are selected. By default, the current directory is selected.""") + add_argument(run_parser, '--dependency', action='append', + default=[], + help="""Same as specifying a module in PYTHON_MODULES but will only be used to build an expression if it would have been selected without that module listed. Operators defined in modules listed here will not be used as the root operator in any expression.""", + ) + add_argument(run_parser, '-s', '--select', action='append', metavar='ID_PATTERN', default=[], @@ -661,8 +666,10 @@ def do_run(args, parser, run_parser, argv): if mod.endswith('.py'): saved_exceps.append((mod, e)) + python_files = list(itertools.chain(args.python_files, args.dependency)) + module_set = set() - for path in args.python_files: + for path in python_files: try: imported = utils.import_modules([path], excep_handler=best_effort) # This might fail, since some adaptor options may introduce "fake" @@ -727,7 +734,8 @@ def do_run(args, parser, run_parser, argv): )) exit_after_import = True - module_set = utils.import_modules(args.python_files, excep_handler=excep_handler) + root_module_set = set(utils.import_modules(args.python_files, excep_handler=excep_handler)) + module_set = root_module_set | set(utils.import_modules(args.dependency, excep_handler=excep_handler)) if exit_after_import: return import_error_code @@ -915,12 +923,12 @@ def do_run(args, parser, run_parser, argv): # Only keep the Expression where the outermost (root) operator is # defined in one of the files that were explicitly specified on the # command line. - inspect.getmodule(op.callable_) in module_set or + inspect.getmodule(op.callable_) in root_module_set or # Also include all methods (including the inherited ones) of # classes that are defined in the files explicitly specified ( isinstance(op.callable_, engine.UnboundMethod) and - op.callable_.cls.__module__ in map(attrgetter('__name__'), module_set) + op.callable_.cls.__module__ in map(attrgetter('__name__'), root_module_set) ) ) ]) -- GitLab From acde32be14442f8a9e1919213bcd58e586ec1b7f Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Tue, 5 Sep 2023 19:07:27 +0100 Subject: [PATCH 4/6] lisa.conf: Fix an exception message FIX --- lisa/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lisa/conf.py b/lisa/conf.py index 882ed2b79..56bb6067d 100644 --- a/lisa/conf.py +++ b/lisa/conf.py @@ -999,7 +999,7 @@ class MultiSrcConfABC(Serializable, abc.ABC): ) for cls_ in offending ): - raise RuntimeError(f'Class {cls.__qualname__} cannot reuse top level key "{format_keys(toplevel_keys)}" as it is already used by {", ".join(offending)}') + raise RuntimeError(f'Class {cls.__qualname__} cannot reuse top level key "{format_keys(toplevel_keys)}" as it is already used by {", ".join(map(str, offending))}') else: cls._REGISTERED_TOPLEVEL_KEYS[toplevel_keys] = cls -- GitLab From 06087a74404c1d7ef3d22d1b34c81e5835ab697f Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Wed, 6 Sep 2023 14:37:06 +0100 Subject: [PATCH 5/6] lisa.utils: Hide warnings in import_all_submodules() FIX Hide warnings when importing modules in import_all_submodules(), to avoid seeing deprecation warnings that would be hard to avoid and not terribly useful in that context. --- lisa/utils.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/lisa/utils.py b/lisa/utils.py index 48c9c39be..7fc612ffd 100644 --- a/lisa/utils.py +++ b/lisa/utils.py @@ -700,15 +700,18 @@ def _import_all_submodules(pkg_name, pkg_path, best_effort=False): for _, module_name, _ in ( pkgutil.walk_packages(pkg_path, prefix=pkg_name + '.') ): - try: - module = importlib.import_module(module_name) - except ImportError: - if best_effort: - pass + try: + # Silence warnings if we hit some deprecated modules + with warnings.catch_warnings(): + warnings.simplefilter(action='ignore') + module = importlib.import_module(module_name) + except ImportError: + if best_effort: + pass + else: + raise else: - raise - else: - modules.append(module) + modules.append(module) return modules -- GitLab From 4a254f0dada74cb46d242fc961241bd1f6260cec Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Tue, 5 Sep 2023 16:56:33 +0100 Subject: [PATCH 6/6] lisa._assets.kmodules.lisa: Make introspect_header.py a no-op when imported FIX Since introspect_header.py is stored inside a Python package, some tools may try to import it. Ensure it will not do anything, especially not try to parse a command line and fail. --- lisa/_assets/kmodules/lisa/introspect_header.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lisa/_assets/kmodules/lisa/introspect_header.py b/lisa/_assets/kmodules/lisa/introspect_header.py index 3d5be1f42..0152e979f 100755 --- a/lisa/_assets/kmodules/lisa/introspect_header.py +++ b/lisa/_assets/kmodules/lisa/introspect_header.py @@ -134,4 +134,5 @@ def main(): -main() +if __name__ == '__main__': + main() -- GitLab