From 4f8c1da875d5a278d47fdcf9e93b37b7ea9a14cb Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Tue, 9 Jul 2024 17:27:08 +0100 Subject: [PATCH 1/4] lisa.target: Resolve known_hosts path when strict_host_check=True FIX When strict_host_check=True, resolve the path to known_hosts SSH file so that the target object can be reused without troubles in a different user namespace. Otherwise, the home folder would resolve to another path, leading to not being able to reload the file content and ultimately failure of the check. --- lisa/target.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lisa/target.py b/lisa/target.py index 69efd5778..362cba87b 100644 --- a/lisa/target.py +++ b/lisa/target.py @@ -817,12 +817,23 @@ class Target(Loggable, HideExekallID, ExekallTaggable, Configurable): adb_as_root=(username == 'root'), ) elif kind == 'linux': + def resolve_strict_host_check(strict_host_check): + strict_host_check = True if strict_host_check is None else strict_host_check + + # Expand the path to known_hosts so that the devlib objects can + # be reused unchanged in another user namespace where the home + # directory would not expand to the expected folder. + if strict_host_check and isinstance(strict_host_check, bool): + strict_host_check = str(Path('~/.ssh/known_hosts').expanduser().resolve()) + + return strict_host_check + devlib_target_cls = devlib.LinuxTarget conn_settings.update( username=resolved_username, port=port or cls.SSH_PORT_DEFAULT, host=host, - strict_host_check=True if strict_host_check is None else strict_host_check, + strict_host_check=resolve_strict_host_check(strict_host_check), use_scp=False if use_scp is None else use_scp, ) -- GitLab From 4a6d5ec6fb0e7ab800bcee5d46409103930cff65 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Mon, 15 Jul 2024 11:22:04 +0100 Subject: [PATCH 2/4] lisa.trace: Avoid error log when dataframe cannot be serialized to cache FIX In some cases, it is expected the dataframe cannot be serialize to cache, e.g. if it contains an object column that cannot be represented in arrow. --- lisa/trace.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lisa/trace.py b/lisa/trace.py index 5f0f42a5f..d1d91c315 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -4485,7 +4485,10 @@ class _TraceCache(Loggable): self._write_data(cache_desc.fmt, data, data_path) except Exception as e: if best_effort: - log_error(e) + # Do not log the error, as it could be an expected one + # (e.g. we have an object column in a dataframe that cannot + # be converted to arrow. + pass else: raise e else: @@ -4499,7 +4502,7 @@ class _TraceCache(Loggable): # probably because the descriptor includes something that # cannot be serialized to JSON. except _CannotWriteSwapEntry as e: - self.logger.debug(f'Could not write {cache_desc} to swap: {e}') + log_error(e) swap_entry.written = False return else: -- GitLab From 73ce4330d5dcd77fbc229753e12d86ce5ac2b87e Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Mon, 15 Jul 2024 12:15:54 +0100 Subject: [PATCH 3/4] lisa.trace: Fix caching for Trace(normalize_time=...) views FIX Fix dataframe caching for trace views created with normalize_time=True. --- lisa/trace.py | 66 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/lisa/trace.py b/lisa/trace.py index d1d91c315..3bfdbe340 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -360,6 +360,21 @@ def _lazyframe_rewrite(df, update_plan): return df +class _CacheDataDescEncodable(abc.ABC): + """ + Inheriting from this class allows encoding a value in JSON for a cache + desc. + """ + + @abc.abstractmethod + def json_encode(self): + """ + Returns a more basic object that can readily be encoded by an + unmodified json serializer. + """ + pass + + CPU = newtype(int, 'CPU', doc='Alias to ``int`` used for CPU IDs') @@ -3399,11 +3414,12 @@ class _ProcessTraceView(_TraceViewBase): @property def trace_state(self): f = self._process_df + return ( super().trace_state, # This likely will be a value that cannot be serialized to JSON if # it was user-provided. This will prevent caching as it should. - None if f is None else f, + f, ) def _internal_df_event(self, event, **kwargs): @@ -3586,6 +3602,22 @@ class _NamespaceTraceView(_TraceViewBase): return super()._internal_df_event(event, **kwargs) +class _TimeOffsetter(_CacheDataDescEncodable): + def __init__(self, offset): + assert isinstance(offset, Timestamp) + offset_ns = offset.as_nanoseconds + self._offset_ns = offset_ns + self._offset_polars = _polars_duration_expr(offset_ns, unit='ns', rounding='down') + + def json_encode(self): + return self._offset_ns + + def __call__(self, event, df): + return df.with_columns( + pl.col('Time') - self._offset_polars + ) + + class _NormalizedTimeTraceView(_TraceViewBase): def __init__(self, trace, window, **kwargs): window = window or (trace.start, None) @@ -3611,14 +3643,10 @@ class _NormalizedTimeTraceView(_TraceViewBase): def _with_time_offset(cls, trace, start): # Round down to avoid ending up with negative Time for anything that # does not actually happen before the start - offset = _polars_duration_expr(start, rounding='down') - def time_offset(event, df): - return df.with_columns( - pl.col('Time') - offset - ) + start = Timestamp(start, rounding='down') return trace.get_view( - process_df=time_offset + process_df=_TimeOffsetter(start) ) @property @@ -3929,10 +3957,23 @@ class _CacheDataSwapEntry: Return a mapping suitable for JSON serialization. """ desc = self.cache_desc_nf.to_json_map() + + class Encoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, _CacheDataDescEncodable): + cls = o.__class__ + return { + 'module': cls.__module__, + 'cls': cls.__qualname__, + 'value': o.json_encode(), + } + else: + return super().default(o) + try: # Use json.dumps() here to fail early if the descriptor cannot be # dumped to JSON - desc = json.dumps(desc) + desc = Encoder().encode(desc) except TypeError as e: raise _CannotWriteSwapEntry(e) @@ -4477,9 +4518,6 @@ class _TraceCache(Loggable): if self._estimate_data_swap_size(data) + self._swap_size > self.max_swap_size: self.scrub_swap() - def log_error(e): - self.logger.error(f'Could not write {cache_desc} to swap: {e}') - # Write the Parquet file and update the write speed try: self._write_data(cache_desc.fmt, data, data_path) @@ -4500,9 +4538,11 @@ class _TraceCache(Loggable): ) # We have a swap entry that cannot be written to the swap, # probably because the descriptor includes something that - # cannot be serialized to JSON. + # cannot be serialized to JSON. This may happen under + # normal operations, e.g. with a user-defined process_df + # function passed to _ProcessTraceView. except _CannotWriteSwapEntry as e: - log_error(e) + self.logger.debug(f'Could not write {cache_desc} to swap: {e}') swap_entry.written = False return else: -- GitLab From 2a4f67e0aa007e8075a5e697ed4ddf7e798afdb7 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Mon, 15 Jul 2024 13:59:55 +0100 Subject: [PATCH 4/4] doc/conf.py: Workaround ipywidget intersphinx inventory issue FIX Workaround for: https://github.com/jupyter-widgets/ipywidgets/issues/3930 --- doc/conf.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/conf.py b/doc/conf.py index 8fd481d41..a8d81f2ea 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -478,6 +478,13 @@ ignored_refs.update( ignored_refs = set(map(re.compile, ignored_refs)) +# Workaround for: https://github.com/jupyter-widgets/ipywidgets/issues/3930 +# Suggested: https://github.com/sphinx-doc/sphinx/issues/12585#issuecomment-2228420035 +suppress_warnings = [ + 'intersphinx.external', +] + + class CustomPythonDomain(PythonDomain): def find_obj(self, env, modname, classname, name, type, searchmode=0): refs = super().find_obj(env, modname, classname, name, type, searchmode) -- GitLab