diff --git a/lisa/_cli_tools/lisa_trace.py b/lisa/_cli_tools/lisa_trace.py new file mode 100755 index 0000000000000000000000000000000000000000..d780968dd6d97eb217f356c198c6483cd45b8afe --- /dev/null +++ b/lisa/_cli_tools/lisa_trace.py @@ -0,0 +1,119 @@ +#! /usr/bin/env python3 + +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (C) 2025, Arm Limited and contributors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +import logging +import argparse +from collections.abc import Mapping + +import polars as pl + +from lisa.trace import Trace + +class TraceFrames(Mapping): + def __init__(self, trace, events=None): + self.trace = trace + self.events = sorted(set( + trace.available_events + if events is None else + events + )) + + def __getitem__(self, key): + return self.trace.df_event(key) + + def __iter__(self): + return iter(self.events) + + def __len__(self): + return len(self.events) + + +def make_sql_context(trace, queries): + lexemes = set( + lexeme + for query in queries + for lexeme in query.split() + ) + available_events = set(trace.available_events) + # Hack: since we don't have any easy programmatic way of knowing what table + # that SQL query is referring to, we simply get the overlap between lexemes + # in the query and the available events. This allows not loading the entire + # trace. + needed_events = sorted(lexemes & available_events) + trace = trace.get_view(events=needed_events) + + frames = TraceFrames( + trace, + events=needed_events, + ) + ctx = pl.SQLContext(frames) + return ctx + + +def sql_main(logger, args): + trace = Trace( + args.trace, + df_fmt='polars-lazyframe' + ) + + queries = args.query + ctx = make_sql_context(trace, queries) + + with pl.Config( + tbl_cols=-1, + tbl_rows=-1, + tbl_width_chars=-1, + ) as cfg: + + for query in queries: + df = ctx.execute(query) + print(df.collect()) + + +def main(argv=None): + if argv is None: + argv = sys.argv[1:] + + logger = logging.getLogger('lisa-trace') + + def add_common_args(parser): + parser.add_argument( + 'trace', + help='Trace file to parse. All formats recognized by lisa.trace.Trace() are allowed.' + ) + + parser = argparse.ArgumentParser(description="LISA trace analysis CLI tool.") + subparsers = parser.add_subparsers(required=True, dest='subcommand', help='subcommand help') + sql_parser = subparsers.add_parser('sql', help='Run a SQL query on the trace using Polars.') + + add_common_args(sql_parser) + sql_parser.add_argument('--query', action='append', help='SQL query to run') + + args = parser.parse_args(argv) + subcommand = args.subcommand + + if subcommand == 'sql': + return sql_main(logger, args) + else: + parser.error('Invalid subcommand: {subcommand}') + + +if __name__ == '__main__': + sys.exit(main())