From 2df531c16083fbca748bc9f5d46432b1746d0fd9 Mon Sep 17 00:00:00 2001 From: Sean Fitzgibbon Date: Mon, 9 Jun 2025 12:34:21 +0100 Subject: [PATCH 1/4] Delete unreachable code Change-Id: I76079a1c16cd29474908059ed057da559e4ad901 --- coverage-tool/coverage-reporting/intermediate_layer.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py index 55474b9..60c8b3c 100644 --- a/coverage-tool/coverage-reporting/intermediate_layer.py +++ b/coverage-tool/coverage-reporting/intermediate_layer.py @@ -1,6 +1,6 @@ # !/usr/bin/env python ############################################################################### -# Copyright (c) 2020-2023, ARM Limited and Contributors. All rights reserved. +# Copyright (c) 2020-2025, ARM Limited and Contributors. All rights reserved. # # SPDX-License-Identifier: BSD-3-Clause ############################################################################### @@ -218,10 +218,6 @@ def get_function_line_numbers(source_file: str) -> Dict[str, int]: except BaseException: logger.warning("Warning: Can't get all function line numbers from %s" % source_file) - except Exception as ex: - logger.warning(f"Warning: Unknown error '{ex}' when executing command " - f"'{command}'") - return {} return fln -- GitLab From 2cd8ff5bc9a191f38bd9fa85a4ff17aa87e16faf Mon Sep 17 00:00:00 2001 From: Sean Fitzgibbon Date: Mon, 9 Jun 2025 13:14:28 +0100 Subject: [PATCH 2/4] Optimize _process_fn_no_sources - find is faster than the -r option for grep, and its results can be cached. - It's faster to scan a file by hand and lookup all the function names at once than it is to invoke grep on it once per function name. Change-Id: I2888c60f019e218667b760f015bff86a09f11843 --- .../coverage-reporting/intermediate_layer.py | 44 ++++++++++++++++--- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py index 60c8b3c..edb30a8 100644 --- a/coverage-tool/coverage-reporting/intermediate_layer.py +++ b/coverage-tool/coverage-reporting/intermediate_layer.py @@ -29,6 +29,8 @@ from typing import Generator from typing import Union from typing import Tuple import logging +import sys +from collections import defaultdict __version__ = "7.0" @@ -560,6 +562,24 @@ class CoverageHandler(object): return self._source_files +IDENTIFIER = re.compile(r'\b[A-Za-z_]\w*\b') + + +def identifiers(filename): + with open(filename, errors='replace') as f: + return IDENTIFIER.findall(f.read()) + + +def locate_identifiers(ids, filenames): + """Map each identifier to the set of files in which it appears.""" + result = defaultdict(set) + for filename in filenames: + for s in identifiers(filename): + if s in ids: + result[s].add(filename) + return result + + class IntermediateCodeCoverage(object): """Class used to process the trace data along with the dwarf signature files to produce an intermediate layer in json with @@ -591,6 +611,7 @@ class IntermediateCodeCoverage(object): self.elf_map = {} # For elf custom mappings self.elf_custom = None + self.found_source_files = None def process(self): """ @@ -683,9 +704,21 @@ class IntermediateCodeCoverage(object): logger.info(f"Total functions: {total_number_functions}, Functions " f"covered:{functions_covered}") # Now check code coverage in the functions with no dwarf signature - self._process_fn_no_sources(parser) + if parser.no_source_functions: + self._process_fn_no_sources(parser) return parser + def find_source_files(self): + # Cache the list of .c, .s and .S files found in the local workspace. + if self.found_source_files is None: + self.found_source_files = [sys.intern(f) for f in subprocess.check_output([ + 'find', self.local_workspace, + '(', '-name', '*.c', '-o', '-name', '*.s', '-o', '-name', '*.S', ')', + '-type', 'f'], + universal_newlines=True, + stderr=subprocess.DEVNULL).splitlines()] + return self.found_source_files + def _process_fn_no_sources(self, parser: BinaryParser): """ Checks function coverage for functions with no dwarf signature i.e. @@ -701,17 +734,14 @@ class IntermediateCodeCoverage(object): traces_address_pointer = 0 _functions = parser.no_source_functions functions_addresses = sorted(_functions.keys()) + function_names = [v['name'] for v in _functions.values()] + hits = locate_identifiers(function_names, self.find_source_files()) address_size = 4 for start_address in functions_addresses: function_covered = False function_name = _functions[start_address]['name'] # Get all files in the source code where the function is defined - source_files = os_command("grep --include '*.c' --include '*.s' " - "--include '*.S' -nrw '{}' {}" - "| cut -d: -f1". - format(function_name, - self.local_workspace)) - unique_files = set(source_files.split()) + unique_files = hits[function_name] sources_found = [] for source_file in unique_files: line_number = parser.function_line_numbers.get_line_number( -- GitLab From d4141967f1fa1e46f535d36c063e0849c71b146a Mon Sep 17 00:00:00 2001 From: Sean Fitzgibbon Date: Wed, 11 Jun 2025 09:33:07 +0100 Subject: [PATCH 3/4] Optimize ctags calls It's expensive to call ctags once per source file identified in the DWARF data. This patch: - Calls ctags on a bunch of source files at a time. The '-L -' argument avoids the risk of a huge command line hitting OS limits. Similarly, we read a line at a time from the stdout pipe to avoid holding a huge amount of output at once in memory. - Recovers the file paths from the lines output by ctags. (It always prints them in '-x' mode, even for a single input file). This exposes us to spurious differences in pathnames from ctags and objdump, (e.g. redundant '/./'), so we fix them up with os.path.relpath(). (This inaccuracy happens elsewhere in intermediate_layer.py, and will be addressed in a future patch). - Similarly, changes FunctionLineNumbers to handle a bunch of functions at once. Even with the cache of ctags results, repeated calls to get_line_number() can be expensive. For example, simply checking that the files are already cached takes linear time each call and can dominate the run time for some workloads. This patch reduces the number of calls to two - one for functions with source information in the DWARF, one for functions without. - Renames get_line_number() to get_definition_map() to reflect these changes. - Renames 'workspace' to 'local_workspace' in FunctionLineNumbers for consistency with the rest of the program. This is cosmetic, and has nothing to do with the rest of the patch. LIMITATIONS: - Parsing the file name from ctags output will break for a path with embedded whitespace. This risk is present in the original (because of shell=True), so it is assumed to be acceptable here. Consider ctags json output if it becomes a problem. - The error message for ctags failure appears to lose specificity because it no longer outputs each file name. The likely intent was to report problems with particular missing, corrupt or unreadable files. In practice, Exuberant ctags returns success in all these cases, even for a single file on the command line. The only way I found to provoke a nonzero exit code was to pass a bad command line argument. The point is that ctags will either fail for every file or none of them, so testing each exit code is not effective. If file-specific errors need to be reported, consider capturing and logging lines from the stderr pipe. Change-Id: I89c80a8ac181d6bc01351c0f4922941829a1ed5c --- .../coverage-reporting/intermediate_layer.py | 150 +++++++++++------- 1 file changed, 95 insertions(+), 55 deletions(-) diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py index edb30a8..b14ba96 100644 --- a/coverage-tool/coverage-reporting/intermediate_layer.py +++ b/coverage-tool/coverage-reporting/intermediate_layer.py @@ -28,6 +28,8 @@ from typing import List from typing import Generator from typing import Union from typing import Tuple +from typing import Iterable +from typing import Set import logging import sys from collections import defaultdict @@ -195,57 +197,88 @@ def remove_workspace(path, workspace): return ret -def get_function_line_numbers(source_file: str) -> Dict[str, int]: +def get_function_line_numbers(source_file_list: Iterable[str]) -> Dict[str, Dict[str, int]]: """ Using ctags get all the function names with their line numbers - within the source_file + within each source file. - :return: Dictionary with function name as key and line number as value + :return: Dictionary with source file name as key. Value is a dictionary with + function name as key and line number as value. """ - command = "ctags -x --c-kinds=f {}".format(source_file) - fln = {} + fln = defaultdict(dict) try: - function_lines = os_command(command).split("\n") - for line in function_lines: + proc = subprocess.Popen( + ['ctags', '--c-kinds=f', '-x', '-L', '-'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + universal_newlines=True # aka 'text' keyword parameter + ) + + proc.stdin.write('\n'.join(source_file_list) + '\n') + proc.stdin.close() + + for line in proc.stdout: cols = line.split() - if len(cols) < 3: + if len(cols) < 4: continue + line_no = int(cols[2]) + # Flatten a path reported as '/foo/./bar' or '/foo/../foo/bar' + # to '/foo/bar'. + source_file = os.path.normpath(cols[3]) if cols[1] == "function": - fln[cols[0]] = int(cols[2]) + fln[source_file][cols[0]] = line_no elif cols[1] == "label": if cols[0] == "func": - fln[cols[-1]] = int(cols[2]) + fln[source_file][cols[-1]] = line_no elif cols[0] + ":" == cols[-1]: - fln[cols[0]] = int(cols[2]) + fln[source_file][cols[0]] = line_no + + if proc.wait() != 0: + raise Exception() + except BaseException: - logger.warning("Warning: Can't get all function line numbers from %s" % - source_file) + logger.warning("Warning: Can't get all function line numbers") return fln class FunctionLineNumbers(object): - """Helper class used to get a function start line number within - a source code file""" + """Helper class used to get all the source code files and start line + numbers for a function""" - def __init__(self, workspace: str): + def __init__(self, local_workspace: str): """ - Initialise dictionary to allocate source code files with the - corresponding function start line numbers. + Initialise a cache of results from ctags. Key is a source file path. + Value is a dictionary with function name as key and starting line number + as value. - :param workspace: The folder where the source files are deployed + :param local_workspace: The folder where the source files are deployed """ - self.filenames = {} - self.workspace = workspace + self.locations_by_source_file = {} + self.local_workspace = local_workspace - def get_line_number(self, filename: str, function_name: str) -> int: + def get_definition_map(self, filenames: Iterable[str], function_names: Set[str]) -> Dict[str, Dict[str, int]]: + """ + Find the definitions for each function name by calling ctags on the given + filenames. Return a dictionary with function name as key. Value is a + dictionary with file name as key and line number as value. If no definitions + are found for a function, it will be omitted from the results. + + filenames are expected to be absolute paths in the local workspace. + """ if not FUNCTION_LINES_ENABLED: - return 0 - if filename not in self.filenames: - source_file = os.path.join(self.workspace, filename) - # Get all functions with their lines in the source file - self.filenames[filename] = get_function_line_numbers(source_file) - return 0 if function_name not in self.filenames[filename] else \ - self.filenames[filename][function_name] + return {} + + missing = set(filenames) - self.locations_by_source_file.keys() + if missing: + self.locations_by_source_file.update(get_function_line_numbers(missing)) + + result = defaultdict(dict) + for filename, definitions in self.locations_by_source_file.items(): + for func, line in definitions.items(): + if func in function_names: + result[func][filename] = definitions[func] + return result class BinaryParser(object): @@ -472,11 +505,16 @@ class BinaryParser(object): source_code_block.source_file, self.workspace) yield source_code_block - def get_function_block(self) -> Generator['BinaryParser.FunctionBlock', - None, None]: - """Generator method to obtain all the function blocks contained in + def _local_path(self, filename): + return os.path.normpath(os.path.join(self.local_workspace, filename) if self.remove_workspace else filename) + + def get_function_blocks(self) -> List['BinaryParser.FunctionBlock']: + """Get list of all the function blocks contained in the binary dump file. """ + result = [] + filenames = set() + function_names = set() for function_block in BinaryParser.FunctionBlock.get(self.dump): if function_block.source_file is None: logger.warning(f"Source file not found for function " @@ -485,10 +523,20 @@ class BinaryParser(object): if self.remove_workspace: function_block.source_file = remove_workspace( function_block.source_file, self.workspace) - function_block.function_line_number = \ - self.function_line_numbers.get_line_number( - function_block.source_file, function_block.name) - yield function_block + + filenames.add(function_block.source_file) + function_names.add(function_block.name) + result.append(function_block) + + definition_map = self.function_line_numbers.get_definition_map( + (self._local_path(f) for f in filenames), + function_names) + + for function_block in result: + function_block.function_line_number = definition_map.get(function_block.name, {}).get( + self._local_path(function_block.source_file), 0) + + return result class CoverageHandler(object): @@ -570,14 +618,10 @@ def identifiers(filename): return IDENTIFIER.findall(f.read()) -def locate_identifiers(ids, filenames): - """Map each identifier to the set of files in which it appears.""" - result = defaultdict(set) - for filename in filenames: - for s in identifiers(filename): - if s in ids: - result[s].add(filename) - return result +def locate_definition_candidates(ids: Set[str], filenames: Iterable[str]) -> Set[str]: + """Get the files which mention any of the ids.""" + return {f for f in filenames + if any(s in ids for s in identifiers(f))} class IntermediateCodeCoverage(object): @@ -674,7 +718,7 @@ class IntermediateCodeCoverage(object): self.local_workspace) total_number_functions = 0 functions_covered = 0 - for function_block in parser.get_function_block(): + for function_block in parser.get_function_blocks(): total_number_functions += 1 # Function contains source code self.coverage.add_function_coverage(function_block) @@ -734,20 +778,16 @@ class IntermediateCodeCoverage(object): traces_address_pointer = 0 _functions = parser.no_source_functions functions_addresses = sorted(_functions.keys()) - function_names = [v['name'] for v in _functions.values()] - hits = locate_identifiers(function_names, self.find_source_files()) + function_names = {v['name'] for v in _functions.values()} + candidates = locate_definition_candidates( + function_names, self.find_source_files()) + definition_map = parser.function_line_numbers.get_definition_map( + candidates, function_names) address_size = 4 for start_address in functions_addresses: function_covered = False function_name = _functions[start_address]['name'] - # Get all files in the source code where the function is defined - unique_files = hits[function_name] - sources_found = [] - for source_file in unique_files: - line_number = parser.function_line_numbers.get_line_number( - source_file, function_name) - if line_number > 0: - sources_found.append((source_file, line_number)) + sources_found = list(definition_map.get(function_name, {}).items()) if len(sources_found) == 0: logger.debug(f"'{function_name}' not found in sources") elif len(sources_found) > 1: -- GitLab From c8d81638c782c4006c912ec1e888f986d5344139 Mon Sep 17 00:00:00 2001 From: Sean Fitzgibbon Date: Thu, 26 Jun 2025 17:16:39 +0100 Subject: [PATCH 4/4] Always normalize paths obtained from objdump This fixes a defect when objdump reports different forms of the same canonical path, e.g. '/foo/bar' and '/foo/./bar'. It's possible for the same line to be reported twice, once covered, once not. Change-Id: I448a64f69d2725b43e6f24de3590e383e58c9b94 --- .../coverage-reporting/intermediate_layer.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py index b14ba96..cec6f11 100644 --- a/coverage-tool/coverage-reporting/intermediate_layer.py +++ b/coverage-tool/coverage-reporting/intermediate_layer.py @@ -500,14 +500,18 @@ class BinaryParser(object): """ for source_code_block in BinaryParser.SourceCodeBlock.get( function_block.dwarf): - if self.remove_workspace: - source_code_block.source_file = remove_workspace( - source_code_block.source_file, self.workspace) + source_code_block.source_file = self._relativized_path( + source_code_block.source_file) yield source_code_block def _local_path(self, filename): return os.path.normpath(os.path.join(self.local_workspace, filename) if self.remove_workspace else filename) + def _relativized_path(self, filename): + if self.remove_workspace: + filename = remove_workspace(filename, self.workspace) + return os.path.normpath(filename) + def get_function_blocks(self) -> List['BinaryParser.FunctionBlock']: """Get list of all the function blocks contained in the binary dump file. @@ -520,9 +524,8 @@ class BinaryParser(object): logger.warning(f"Source file not found for function " f"{function_block.name}, will not be covered") continue - if self.remove_workspace: - function_block.source_file = remove_workspace( - function_block.source_file, self.workspace) + function_block.source_file = self._relativized_path( + function_block.source_file) filenames.add(function_block.source_file) function_names.add(function_block.name) -- GitLab