From 2df531c16083fbca748bc9f5d46432b1746d0fd9 Mon Sep 17 00:00:00 2001 From: Sean Fitzgibbon Date: Mon, 9 Jun 2025 12:34:21 +0100 Subject: [PATCH 1/5] Delete unreachable code Change-Id: I76079a1c16cd29474908059ed057da559e4ad901 --- coverage-tool/coverage-reporting/intermediate_layer.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py index 55474b9..60c8b3c 100644 --- a/coverage-tool/coverage-reporting/intermediate_layer.py +++ b/coverage-tool/coverage-reporting/intermediate_layer.py @@ -1,6 +1,6 @@ # !/usr/bin/env python ############################################################################### -# Copyright (c) 2020-2023, ARM Limited and Contributors. All rights reserved. +# Copyright (c) 2020-2025, ARM Limited and Contributors. All rights reserved. # # SPDX-License-Identifier: BSD-3-Clause ############################################################################### @@ -218,10 +218,6 @@ def get_function_line_numbers(source_file: str) -> Dict[str, int]: except BaseException: logger.warning("Warning: Can't get all function line numbers from %s" % source_file) - except Exception as ex: - logger.warning(f"Warning: Unknown error '{ex}' when executing command " - f"'{command}'") - return {} return fln -- GitLab From f88a6782f0fe30818f7d862c2b69e5f1e33399db Mon Sep 17 00:00:00 2001 From: Sean Fitzgibbon Date: Mon, 9 Jun 2025 13:14:28 +0100 Subject: [PATCH 2/5] Optimize _process_fn_no_sources - find is faster than the -r option for grep, and its results can be cached. - It's faster to scan a file by hand and lookup all the function names at once than it is to invoke grep on it once per function name. Change-Id: I2888c60f019e218667b760f015bff86a09f11843 --- .../coverage-reporting/intermediate_layer.py | 39 ++++++++++++++++--- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py index 60c8b3c..21ba356 100644 --- a/coverage-tool/coverage-reporting/intermediate_layer.py +++ b/coverage-tool/coverage-reporting/intermediate_layer.py @@ -29,6 +29,8 @@ from typing import Generator from typing import Union from typing import Tuple import logging +import sys +from collections import defaultdict __version__ = "7.0" @@ -560,6 +562,24 @@ class CoverageHandler(object): return self._source_files +IDENTIFIER = re.compile(r'\b[A-Za-z_]\w*\b') + +def identifiers(filename): + with open(filename) as f: + for line in f: + for match in IDENTIFIER.finditer(line): + yield match.group() + +def locate_identifiers(ids, filenames): + """Map each identifier to the set of files in which it appears.""" + result = defaultdict(set) + for filename in filenames: + for s in identifiers(filename): + if s in ids: + result[s].add(filename) + return result + + class IntermediateCodeCoverage(object): """Class used to process the trace data along with the dwarf signature files to produce an intermediate layer in json with @@ -591,6 +611,7 @@ class IntermediateCodeCoverage(object): self.elf_map = {} # For elf custom mappings self.elf_custom = None + self.found_source_files = None def process(self): """ @@ -686,6 +707,15 @@ class IntermediateCodeCoverage(object): self._process_fn_no_sources(parser) return parser + def find_source_files(self): + # Cache the list of .c, .s and .S files found in the local workspace. + if self.found_source_files is None: + self.found_source_files = [sys.intern(f) for f in subprocess.check_output([ + 'find', self.local_workspace, + '(', '-name', '*.c', '-o', '-name', '*.s', '-o', '-name', '*.S', ')', + '-type', 'f', '-print0']).decode('utf-8').rstrip('\0').split('\0')] + return self.found_source_files + def _process_fn_no_sources(self, parser: BinaryParser): """ Checks function coverage for functions with no dwarf signature i.e. @@ -701,17 +731,14 @@ class IntermediateCodeCoverage(object): traces_address_pointer = 0 _functions = parser.no_source_functions functions_addresses = sorted(_functions.keys()) + function_names = [v['name'] for v in _functions.values()] + hits = locate_identifiers(function_names, self.find_source_files()) address_size = 4 for start_address in functions_addresses: function_covered = False function_name = _functions[start_address]['name'] # Get all files in the source code where the function is defined - source_files = os_command("grep --include '*.c' --include '*.s' " - "--include '*.S' -nrw '{}' {}" - "| cut -d: -f1". - format(function_name, - self.local_workspace)) - unique_files = set(source_files.split()) + unique_files = hits[function_name] sources_found = [] for source_file in unique_files: line_number = parser.function_line_numbers.get_line_number( -- GitLab From ca980a7a6b2f23510b3da2446d00368229d45c10 Mon Sep 17 00:00:00 2001 From: Sean Fitzgibbon Date: Tue, 10 Jun 2025 17:20:35 +0100 Subject: [PATCH 3/5] Optimize parsing identifiers from source file - It's faster to scan the whole file at once rather than one line at a time. Change-Id: Ia85eb1bebd59ce1174ba766ed1342ed665e0bdf3 --- coverage-tool/coverage-reporting/intermediate_layer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py index 21ba356..3756c67 100644 --- a/coverage-tool/coverage-reporting/intermediate_layer.py +++ b/coverage-tool/coverage-reporting/intermediate_layer.py @@ -566,9 +566,8 @@ IDENTIFIER = re.compile(r'\b[A-Za-z_]\w*\b') def identifiers(filename): with open(filename) as f: - for line in f: - for match in IDENTIFIER.finditer(line): - yield match.group() + for match in IDENTIFIER.finditer(f.read()): + yield match.group() def locate_identifiers(ids, filenames): """Map each identifier to the set of files in which it appears.""" -- GitLab From 0456434442b5e6286d50384fdd5d74bc9fd57089 Mon Sep 17 00:00:00 2001 From: Sean Fitzgibbon Date: Tue, 10 Jun 2025 17:23:59 +0100 Subject: [PATCH 4/5] Simplify and slightly speed up find command line - Fix -print0 ugliness - Send stderr to /dev/null instead of the console - find can be chatty. Change-Id: I061d750bc3c9deae393a2f76594beda1d7c516c1 --- coverage-tool/coverage-reporting/intermediate_layer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py index 3756c67..bac53bc 100644 --- a/coverage-tool/coverage-reporting/intermediate_layer.py +++ b/coverage-tool/coverage-reporting/intermediate_layer.py @@ -712,7 +712,9 @@ class IntermediateCodeCoverage(object): self.found_source_files = [sys.intern(f) for f in subprocess.check_output([ 'find', self.local_workspace, '(', '-name', '*.c', '-o', '-name', '*.s', '-o', '-name', '*.S', ')', - '-type', 'f', '-print0']).decode('utf-8').rstrip('\0').split('\0')] + '-type', 'f'], + universal_newlines=True, + stderr=subprocess.DEVNULL).splitlines() ] return self.found_source_files def _process_fn_no_sources(self, parser: BinaryParser): -- GitLab From 36a799c55dd6941221f228f387bb6581baae66df Mon Sep 17 00:00:00 2001 From: Sean Fitzgibbon Date: Wed, 11 Jun 2025 09:33:07 +0100 Subject: [PATCH 5/5] Optimize ctags calls It's expensive to call ctags once per source file identified in the DWARF data. This patch: - Calls ctags on all the source files from a single ELF at once. '-L -' is used to avoid risk of hitting OS limits with a huge command line. - Similarly, reads the output a line at a time to avoid holding a huge amount of ctags output at once in memory. - Recovers the file paths from the lines output by ctags, which it always prints in '-x' mode, even for a single input file. (Note that this will break on paths with embedded spaces. This limitation was present in the original through the use of shell=True, so it is assumed to be acceptable here - consider json output if it becomes a problem). - Renames 'workspace' to 'local_workspace' in FunctionLineNumbers for consistency with the rest of the program. (For programming convenience, the cache of results is changed to key by local file name - i.e. with local_workspace prepended - rather than the caller's file name, but is otherwise unchanged). Note the apparent loss of specificity in the error message for ctags failure, which no longer outputs each file name. This was probably intended to report problems with particular missing, corrupted, or unreadable files. In practice, Exuberant ctags returns success in these cases, even for a single file on the command line. The only way I found to provoke a nonzero exit code was to pass a bad command line argument. The point is that ctags will either fail for every file or none of them, so testing each exit code is not effective. If file-specific errors need to be reported, consider capturing and logging lines of stderr. Change-Id: I89c80a8ac181d6bc01351c0f4922941829a1ed5c --- .../coverage-reporting/intermediate_layer.py | 110 +++++++++++------- 1 file changed, 69 insertions(+), 41 deletions(-) diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py index bac53bc..18507d5 100644 --- a/coverage-tool/coverage-reporting/intermediate_layer.py +++ b/coverage-tool/coverage-reporting/intermediate_layer.py @@ -28,6 +28,7 @@ from typing import List from typing import Generator from typing import Union from typing import Tuple +from typing import Iterable import logging import sys from collections import defaultdict @@ -195,57 +196,82 @@ def remove_workspace(path, workspace): return ret -def get_function_line_numbers(source_file: str) -> Dict[str, int]: +def get_function_line_numbers(source_file_list: Iterable[str]) -> Dict[str, Dict[str, int]]: """ Using ctags get all the function names with their line numbers - within the source_file + within each source file. - :return: Dictionary with function name as key and line number as value + :return: Dictionary with source file name as key. Value is a dictionary with + function name as key and line number as value. """ - command = "ctags -x --c-kinds=f {}".format(source_file) - fln = {} + fln = defaultdict(dict) try: - function_lines = os_command(command).split("\n") - for line in function_lines: + proc = subprocess.Popen( + ['ctags', '--c-kinds=f', '-x', '-L', '-'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + universal_newlines=True # aka 'text' keyword parameter + ) + + proc.stdin.write('\n'.join(source_file_list) + '\n') + proc.stdin.close() + + for line in proc.stdout: cols = line.split() - if len(cols) < 3: + if len(cols) < 4: continue + line_no = int(cols[2]) + source_file = cols[3] if cols[1] == "function": - fln[cols[0]] = int(cols[2]) + fln[source_file][cols[0]] = line_no elif cols[1] == "label": if cols[0] == "func": - fln[cols[-1]] = int(cols[2]) + fln[source_file][cols[-1]] = line_no elif cols[0] + ":" == cols[-1]: - fln[cols[0]] = int(cols[2]) + fln[source_file][cols[0]] = line_no + + if proc.wait() != 0: + raise Exception() + except BaseException: - logger.warning("Warning: Can't get all function line numbers from %s" % - source_file) + logger.warning("Warning: Can't get all function line numbers") return fln class FunctionLineNumbers(object): - """Helper class used to get a function start line number within - a source code file""" + """Helper class used to get all the source code files and start line + numbers for a function""" - def __init__(self, workspace: str): + def __init__(self, local_workspace: str): """ Initialise dictionary to allocate source code files with the corresponding function start line numbers. - :param workspace: The folder where the source files are deployed + :param local_workspace: The folder where the source files are deployed """ - self.filenames = {} - self.workspace = workspace + self.filenames = {} # Keys are file paths in the local workspace, where we run ctags + self.local_workspace = local_workspace - def get_line_number(self, filename: str, function_name: str) -> int: + def get_line_numbers(self, filenames: List[str], function_name: str) -> Dict[str, int]: if not FUNCTION_LINES_ENABLED: - return 0 - if filename not in self.filenames: - source_file = os.path.join(self.workspace, filename) - # Get all functions with their lines in the source file - self.filenames[filename] = get_function_line_numbers(source_file) - return 0 if function_name not in self.filenames[filename] else \ - self.filenames[filename][function_name] + return {} + + # Input filenames are relative to the workspace and need adjusting for + # the local_workspace. + def localize(filename): + return os.path.join(self.local_workspace, filename) + + local_filenames = { localize(f) for f in filenames } + + missing = local_filenames - self.filenames.keys() + if missing: + self.filenames.update(get_function_line_numbers(missing)) + + # A dict with the input file name as key and the line number as value. + return { f : self.filenames[localize(f)][function_name] + for f in filenames + if function_name in self.filenames.get(localize(f), {}) } class BinaryParser(object): @@ -472,11 +498,12 @@ class BinaryParser(object): source_code_block.source_file, self.workspace) yield source_code_block - def get_function_block(self) -> Generator['BinaryParser.FunctionBlock', - None, None]: - """Generator method to obtain all the function blocks contained in + def get_function_blocks(self) -> List['BinaryParser.FunctionBlock']: + """Get list of all the function blocks contained in the binary dump file. """ + result = [] + filenames = [] for function_block in BinaryParser.FunctionBlock.get(self.dump): if function_block.source_file is None: logger.warning(f"Source file not found for function " @@ -485,10 +512,14 @@ class BinaryParser(object): if self.remove_workspace: function_block.source_file = remove_workspace( function_block.source_file, self.workspace) - function_block.function_line_number = \ - self.function_line_numbers.get_line_number( - function_block.source_file, function_block.name) - yield function_block + + filenames.append(function_block.source_file) + result.append(function_block) + + for function_block in result: + line_numbers_by_file = self.function_line_numbers.get_line_numbers(filenames, function_block.name) + function_block.function_line_number = line_numbers_by_file.get(function_block.source_file, 0) + return result class CoverageHandler(object): @@ -673,7 +704,7 @@ class IntermediateCodeCoverage(object): self.local_workspace) total_number_functions = 0 functions_covered = 0 - for function_block in parser.get_function_block(): + for function_block in parser.get_function_blocks(): total_number_functions += 1 # Function contains source code self.coverage.add_function_coverage(function_block) @@ -740,12 +771,9 @@ class IntermediateCodeCoverage(object): function_name = _functions[start_address]['name'] # Get all files in the source code where the function is defined unique_files = hits[function_name] - sources_found = [] - for source_file in unique_files: - line_number = parser.function_line_numbers.get_line_number( - source_file, function_name) - if line_number > 0: - sources_found.append((source_file, line_number)) + line_numbers_by_file = parser.function_line_numbers.get_line_numbers( + unique_files, function_name) + sources_found = list(line_numbers_by_file.items()) if len(sources_found) == 0: logger.debug(f"'{function_name}' not found in sources") elif len(sources_found) > 1: -- GitLab