From 262e51d8858c0746f419b915361e6cbeef13a65c Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Tue, 15 Jul 2025 14:00:34 +0100 Subject: [PATCH] lisa._doc.helpers: Improve AI API index FIX Do not include publicly inherited methods in the index to avoid repeating the same information lots of times. That drastically reduces the size of the index and avoids ambiguous matches. --- doc/conf.py | 9 +++++++-- lisa/_doc/helpers.py | 44 ++++++++++++++++++++++++++++++-------------- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 7537e3882..4cb654d90 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -56,7 +56,7 @@ from lisa._doc.helpers import ( DocPlotConf, autodoc_pre_make_plots, intersphinx_warn_missing_reference_handler, autodoc_ai_desc_process, autodoc_ai_desc_merge, - autodoc_ai_desc_build_finished + make_autodoc_ai_desc_build_finished ) from lisa.analysis.base import TraceAnalysisBase @@ -753,7 +753,12 @@ def setup(app): app.connect('autodoc-process-bases', autodoc_process_bases_handler) app.connect('env-merge-info', autodoc_ai_desc_merge) - app.connect('build-finished', autodoc_ai_desc_build_finished) + app.connect('build-finished', make_autodoc_ai_desc_build_finished( + path='api_ai_descs.json', + # We don't want to muddy search results with irrelevant data, so we + # restrict it to the lisa Python package. + filter_=lambda data: data['name'].startswith('lisa.') + )) app.connect('autodoc-skip-member', _autodoc_skip_member_handler) app.connect('lisa-exec-state', lambda app: ExecState(plots=plots)) diff --git a/lisa/_doc/helpers.py b/lisa/_doc/helpers.py index 7d880a1a9..1afafec78 100644 --- a/lisa/_doc/helpers.py +++ b/lisa/_doc/helpers.py @@ -1835,8 +1835,21 @@ def autodoc_ai_desc_process(app, what, name, obj, options, lines): pass else: if not docobj.autodoc_is_skipped(app): - descs = _get_ai_descs_app(app) - descs.add(docobj.ai_desc) + # If a method is inherited publicly, we don't want to add its + # descriptor for all the places where it is inherited, otherwise we + # will end up with multiple times the same docstring which will + # confuse the LLM. It is also guaranteed (?) that the method will + # be documented at the point where it is defined, so it will appear + # in the descriptors list. + # + # However, a privately-inherited method is still included, as it + # would otherwise not appear anywhere. + inherited, place, visibility = docobj.resolve_inheritance_style(app) + if inherited and visibility == 'public': + pass + else: + descs = _get_ai_descs_app(app) + descs.add(docobj.ai_desc) def autodoc_ai_desc_merge(app, env, docnames, other): @@ -1845,21 +1858,24 @@ def autodoc_ai_desc_merge(app, env, docnames, other): main_descs.update(other_descs) -def autodoc_ai_desc_build_finished(app, exception): - descs = _get_ai_descs_app(app) +def make_autodoc_ai_desc_build_finished(path, filter_): + def autodoc_ai_desc_build_finished(app, exception): + descs = _get_ai_descs_app(app) - descs = [ - desc.data - for desc in descs - ] - descs = sorted(descs, key=itemgetter('name')) + descs = [ + desc.data + for desc in descs + if filter_(desc.data) + ] + descs = sorted(descs, key=itemgetter('name')) - root = Path(app.outdir) - path = root / 'api_ai_descs.json' - with open(path, 'w') as f: - json.dump(descs, f) + root = Path(app.outdir) + _path = root / path + with open(_path, 'w') as f: + json.dump(descs, f) - getLogger('ai-desc').info(f'Wrote {len(descs)} descriptors to: {path}') + getLogger('ai-desc').info(f'Wrote {len(descs)} descriptors to: {_path}') + return autodoc_ai_desc_build_finished def intersphinx_warn_missing_reference_handler(app, domain, node, non_ignored_refs): -- GitLab