From 2df531c16083fbca748bc9f5d46432b1746d0fd9 Mon Sep 17 00:00:00 2001
From: Sean Fitzgibbon <sean.fitzgibbon@arm.com>
Date: Mon, 9 Jun 2025 12:34:21 +0100
Subject: [PATCH 1/4] Delete unreachable code

Change-Id: I76079a1c16cd29474908059ed057da559e4ad901
---
 coverage-tool/coverage-reporting/intermediate_layer.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py
index 55474b9..60c8b3c 100644
--- a/coverage-tool/coverage-reporting/intermediate_layer.py
+++ b/coverage-tool/coverage-reporting/intermediate_layer.py
@@ -1,6 +1,6 @@
 # !/usr/bin/env python
 ###############################################################################
-# Copyright (c) 2020-2023, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2020-2025, ARM Limited and Contributors. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 ###############################################################################
@@ -218,10 +218,6 @@ def get_function_line_numbers(source_file: str) -> Dict[str, int]:
     except BaseException:
         logger.warning("Warning: Can't get all function line numbers from %s" %
                        source_file)
-    except Exception as ex:
-        logger.warning(f"Warning: Unknown error '{ex}' when executing command "
-                       f"'{command}'")
-        return {}
     return fln
 
 
-- 
GitLab


From 2cd8ff5bc9a191f38bd9fa85a4ff17aa87e16faf Mon Sep 17 00:00:00 2001
From: Sean Fitzgibbon <sean.fitzgibbon@arm.com>
Date: Mon, 9 Jun 2025 13:14:28 +0100
Subject: [PATCH 2/4] Optimize _process_fn_no_sources

- find is faster than the -r option for grep, and its results can be
  cached.
- It's faster to scan a file by hand and lookup all the function names
  at once than it is to invoke grep on it once per function name.

Change-Id: I2888c60f019e218667b760f015bff86a09f11843
---
 .../coverage-reporting/intermediate_layer.py  | 44 ++++++++++++++++---
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py
index 60c8b3c..edb30a8 100644
--- a/coverage-tool/coverage-reporting/intermediate_layer.py
+++ b/coverage-tool/coverage-reporting/intermediate_layer.py
@@ -29,6 +29,8 @@ from typing import Generator
 from typing import Union
 from typing import Tuple
 import logging
+import sys
+from collections import defaultdict
 
 __version__ = "7.0"
 
@@ -560,6 +562,24 @@ class CoverageHandler(object):
         return self._source_files
 
 
+IDENTIFIER = re.compile(r'\b[A-Za-z_]\w*\b')
+
+
+def identifiers(filename):
+    with open(filename, errors='replace') as f:
+        return IDENTIFIER.findall(f.read())
+
+
+def locate_identifiers(ids, filenames):
+    """Map each identifier to the set of files in which it appears."""
+    result = defaultdict(set)
+    for filename in filenames:
+        for s in identifiers(filename):
+            if s in ids:
+                result[s].add(filename)
+    return result
+
+
 class IntermediateCodeCoverage(object):
     """Class used to process the trace data along with the dwarf
     signature files to produce an intermediate layer in json with
@@ -591,6 +611,7 @@ class IntermediateCodeCoverage(object):
         self.elf_map = {}
         # For elf custom mappings
         self.elf_custom = None
+        self.found_source_files = None
 
     def process(self):
         """
@@ -683,9 +704,21 @@ class IntermediateCodeCoverage(object):
         logger.info(f"Total functions: {total_number_functions}, Functions "
                     f"covered:{functions_covered}")
         # Now check code coverage in the functions with no dwarf signature
-        self._process_fn_no_sources(parser)
+        if parser.no_source_functions:
+            self._process_fn_no_sources(parser)
         return parser
 
+    def find_source_files(self):
+        # Cache the list of .c, .s and .S files found in the local workspace.
+        if self.found_source_files is None:
+            self.found_source_files = [sys.intern(f) for f in subprocess.check_output([
+                'find', self.local_workspace,
+                '(', '-name', '*.c', '-o', '-name', '*.s', '-o', '-name', '*.S', ')',
+                '-type', 'f'],
+                    universal_newlines=True,
+                    stderr=subprocess.DEVNULL).splitlines()]
+        return self.found_source_files
+
     def _process_fn_no_sources(self, parser: BinaryParser):
         """
         Checks function coverage for functions with no dwarf signature i.e.
@@ -701,17 +734,14 @@ class IntermediateCodeCoverage(object):
         traces_address_pointer = 0
         _functions = parser.no_source_functions
         functions_addresses = sorted(_functions.keys())
+        function_names = [v['name'] for v in _functions.values()]
+        hits = locate_identifiers(function_names, self.find_source_files())
         address_size = 4
         for start_address in functions_addresses:
             function_covered = False
             function_name = _functions[start_address]['name']
             # Get all files in the source code where the function is defined
-            source_files = os_command("grep --include '*.c' --include '*.s' "
-                                      "--include '*.S' -nrw '{}' {}"
-                                      "| cut -d: -f1".
-                                      format(function_name,
-                                             self.local_workspace))
-            unique_files = set(source_files.split())
+            unique_files = hits[function_name]
             sources_found = []
             for source_file in unique_files:
                 line_number = parser.function_line_numbers.get_line_number(
-- 
GitLab


From d4141967f1fa1e46f535d36c063e0849c71b146a Mon Sep 17 00:00:00 2001
From: Sean Fitzgibbon <sean.fitzgibbon@arm.com>
Date: Wed, 11 Jun 2025 09:33:07 +0100
Subject: [PATCH 3/4] Optimize ctags calls

It's expensive to call ctags once per source file identified in the
DWARF data.  This patch:

- Calls ctags on a bunch of source files at a time.  The '-L -'
  argument avoids the risk of a huge command line hitting OS limits.
  Similarly, we read a line at a time from the stdout pipe to avoid
  holding a huge amount of output at once in memory.

- Recovers the file paths from the lines output by ctags. (It always
  prints them in '-x' mode, even for a single input file).  This
  exposes us to spurious differences in pathnames from ctags and
  objdump, (e.g. redundant '/./'), so we fix them up with
  os.path.relpath().  (This inaccuracy happens elsewhere in
  intermediate_layer.py, and will be addressed in a future patch).

- Similarly, changes FunctionLineNumbers to handle a bunch of
  functions at once.  Even with the cache of ctags results, repeated
  calls to get_line_number() can be expensive.  For example, simply
  checking that the files are already cached takes linear time each
  call and can dominate the run time for some workloads.  This patch
  reduces the number of calls to two - one for functions with source
  information in the DWARF, one for functions without.

- Renames get_line_number() to get_definition_map() to reflect these
  changes.

- Renames 'workspace' to 'local_workspace' in FunctionLineNumbers for
  consistency with the rest of the program.  This is cosmetic, and has
  nothing to do with the rest of the patch.

LIMITATIONS:

- Parsing the file name from ctags output will break for a path with
  embedded whitespace.  This risk is present in the original (because
  of shell=True), so it is assumed to be acceptable here.  Consider
  ctags json output if it becomes a problem.

- The error message for ctags failure appears to lose specificity
  because it no longer outputs each file name.  The likely intent was
  to report problems with particular missing, corrupt or unreadable
  files.  In practice, Exuberant ctags returns success in all these
  cases, even for a single file on the command line.  The only way I
  found to provoke a nonzero exit code was to pass a bad command line
  argument.  The point is that ctags will either fail for every file
  or none of them, so testing each exit code is not effective.  If
  file-specific errors need to be reported, consider capturing and
  logging lines from the stderr pipe.

Change-Id: I89c80a8ac181d6bc01351c0f4922941829a1ed5c
---
 .../coverage-reporting/intermediate_layer.py  | 150 +++++++++++-------
 1 file changed, 95 insertions(+), 55 deletions(-)

diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py
index edb30a8..b14ba96 100644
--- a/coverage-tool/coverage-reporting/intermediate_layer.py
+++ b/coverage-tool/coverage-reporting/intermediate_layer.py
@@ -28,6 +28,8 @@ from typing import List
 from typing import Generator
 from typing import Union
 from typing import Tuple
+from typing import Iterable
+from typing import Set
 import logging
 import sys
 from collections import defaultdict
@@ -195,57 +197,88 @@ def remove_workspace(path, workspace):
     return ret
 
 
-def get_function_line_numbers(source_file: str) -> Dict[str, int]:
+def get_function_line_numbers(source_file_list: Iterable[str]) -> Dict[str, Dict[str, int]]:
     """
     Using ctags get all the function names with their line numbers
-    within the source_file
+    within each source file.
 
-    :return: Dictionary with function name as key and line number as value
+    :return: Dictionary with source file name as key.  Value is a dictionary with
+             function name as key and line number as value.
     """
-    command = "ctags -x --c-kinds=f {}".format(source_file)
-    fln = {}
+    fln = defaultdict(dict)
     try:
-        function_lines = os_command(command).split("\n")
-        for line in function_lines:
+        proc = subprocess.Popen(
+            ['ctags', '--c-kinds=f', '-x', '-L', '-'],
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.DEVNULL,
+            universal_newlines=True # aka 'text' keyword parameter
+        )
+
+        proc.stdin.write('\n'.join(source_file_list) + '\n')
+        proc.stdin.close()
+
+        for line in proc.stdout:
             cols = line.split()
-            if len(cols) < 3:
+            if len(cols) < 4:
                 continue
+            line_no = int(cols[2])
+            # Flatten a path reported as '/foo/./bar' or '/foo/../foo/bar'
+            # to '/foo/bar'.
+            source_file = os.path.normpath(cols[3])
             if cols[1] == "function":
-                fln[cols[0]] = int(cols[2])
+                fln[source_file][cols[0]] = line_no
             elif cols[1] == "label":
                 if cols[0] == "func":
-                    fln[cols[-1]] = int(cols[2])
+                    fln[source_file][cols[-1]] = line_no
                 elif cols[0] + ":" == cols[-1]:
-                    fln[cols[0]] = int(cols[2])
+                    fln[source_file][cols[0]] = line_no
+
+        if proc.wait() != 0:
+            raise Exception()
+
     except BaseException:
-        logger.warning("Warning: Can't get all function line numbers from %s" %
-                       source_file)
+        logger.warning("Warning: Can't get all function line numbers")
     return fln
 
 
 class FunctionLineNumbers(object):
-    """Helper class used to get a function start line number within
-     a source code file"""
+    """Helper class used to get all the source code files and start line
+       numbers for a function"""
 
-    def __init__(self, workspace: str):
+    def __init__(self, local_workspace: str):
         """
-        Initialise dictionary to allocate source code files with the
-        corresponding function start line numbers.
+        Initialise a cache of results from ctags.  Key is a source file path.
+        Value is a dictionary with function name as key and starting line number
+        as value.
 
-        :param workspace: The folder where the source files are deployed
+        :param local_workspace: The folder where the source files are deployed
         """
-        self.filenames = {}
-        self.workspace = workspace
+        self.locations_by_source_file = {}
+        self.local_workspace = local_workspace
 
-    def get_line_number(self, filename: str, function_name: str) -> int:
+    def get_definition_map(self, filenames: Iterable[str], function_names: Set[str]) -> Dict[str, Dict[str, int]]:
+        """
+        Find the definitions for each function name by calling ctags on the given
+        filenames.  Return a dictionary with function name as key.  Value is a
+        dictionary with file name as key and line number as value.  If no definitions
+        are found for a function, it will be omitted from the results.
+
+        filenames are expected to be absolute paths in the local workspace.
+        """
         if not FUNCTION_LINES_ENABLED:
-            return 0
-        if filename not in self.filenames:
-            source_file = os.path.join(self.workspace, filename)
-            # Get all functions with their lines in the source file
-            self.filenames[filename] = get_function_line_numbers(source_file)
-        return 0 if function_name not in self.filenames[filename] else \
-            self.filenames[filename][function_name]
+            return {}
+
+        missing = set(filenames) - self.locations_by_source_file.keys()
+        if missing:
+            self.locations_by_source_file.update(get_function_line_numbers(missing))
+
+        result = defaultdict(dict)
+        for filename, definitions in self.locations_by_source_file.items():
+            for func, line in definitions.items():
+                if func in function_names:
+                    result[func][filename] = definitions[func]
+        return result
 
 
 class BinaryParser(object):
@@ -472,11 +505,16 @@ class BinaryParser(object):
                     source_code_block.source_file, self.workspace)
             yield source_code_block
 
-    def get_function_block(self) -> Generator['BinaryParser.FunctionBlock',
-                                              None, None]:
-        """Generator method to obtain all the function blocks contained in
+    def _local_path(self, filename):
+        return os.path.normpath(os.path.join(self.local_workspace, filename) if self.remove_workspace else filename)
+
+    def get_function_blocks(self) -> List['BinaryParser.FunctionBlock']:
+        """Get list of all the function blocks contained in
         the binary dump file.
         """
+        result = []
+        filenames = set()
+        function_names = set()
         for function_block in BinaryParser.FunctionBlock.get(self.dump):
             if function_block.source_file is None:
                 logger.warning(f"Source file not found for function "
@@ -485,10 +523,20 @@ class BinaryParser(object):
             if self.remove_workspace:
                 function_block.source_file = remove_workspace(
                     function_block.source_file, self.workspace)
-            function_block.function_line_number = \
-                self.function_line_numbers.get_line_number(
-                    function_block.source_file, function_block.name)
-            yield function_block
+
+            filenames.add(function_block.source_file)
+            function_names.add(function_block.name)
+            result.append(function_block)
+
+        definition_map = self.function_line_numbers.get_definition_map(
+            (self._local_path(f) for f in filenames),
+            function_names)
+
+        for function_block in result:
+            function_block.function_line_number = definition_map.get(function_block.name, {}).get(
+                self._local_path(function_block.source_file), 0)
+
+        return result
 
 
 class CoverageHandler(object):
@@ -570,14 +618,10 @@ def identifiers(filename):
         return IDENTIFIER.findall(f.read())
 
 
-def locate_identifiers(ids, filenames):
-    """Map each identifier to the set of files in which it appears."""
-    result = defaultdict(set)
-    for filename in filenames:
-        for s in identifiers(filename):
-            if s in ids:
-                result[s].add(filename)
-    return result
+def locate_definition_candidates(ids: Set[str], filenames: Iterable[str]) -> Set[str]:
+    """Get the files which mention any of the ids."""
+    return {f for f in filenames
+              if any(s in ids for s in identifiers(f))}
 
 
 class IntermediateCodeCoverage(object):
@@ -674,7 +718,7 @@ class IntermediateCodeCoverage(object):
                               self.local_workspace)
         total_number_functions = 0
         functions_covered = 0
-        for function_block in parser.get_function_block():
+        for function_block in parser.get_function_blocks():
             total_number_functions += 1
             # Function contains source code
             self.coverage.add_function_coverage(function_block)
@@ -734,20 +778,16 @@ class IntermediateCodeCoverage(object):
         traces_address_pointer = 0
         _functions = parser.no_source_functions
         functions_addresses = sorted(_functions.keys())
-        function_names = [v['name'] for v in _functions.values()]
-        hits = locate_identifiers(function_names, self.find_source_files())
+        function_names = {v['name'] for v in _functions.values()}
+        candidates = locate_definition_candidates(
+            function_names, self.find_source_files())
+        definition_map = parser.function_line_numbers.get_definition_map(
+            candidates, function_names)
         address_size = 4
         for start_address in functions_addresses:
             function_covered = False
             function_name = _functions[start_address]['name']
-            # Get all files in the source code where the function is defined
-            unique_files = hits[function_name]
-            sources_found = []
-            for source_file in unique_files:
-                line_number = parser.function_line_numbers.get_line_number(
-                    source_file, function_name)
-                if line_number > 0:
-                    sources_found.append((source_file, line_number))
+            sources_found = list(definition_map.get(function_name, {}).items())
             if len(sources_found) == 0:
                 logger.debug(f"'{function_name}' not found in sources")
             elif len(sources_found) > 1:
-- 
GitLab


From c8d81638c782c4006c912ec1e888f986d5344139 Mon Sep 17 00:00:00 2001
From: Sean Fitzgibbon <sean.fitzgibbon@arm.com>
Date: Thu, 26 Jun 2025 17:16:39 +0100
Subject: [PATCH 4/4] Always normalize paths obtained from objdump

This fixes a defect when objdump reports different forms of the same
canonical path, e.g. '/foo/bar' and '/foo/./bar'.  It's possible for
the same line to be reported twice, once covered, once not.

Change-Id: I448a64f69d2725b43e6f24de3590e383e58c9b94
---
 .../coverage-reporting/intermediate_layer.py      | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py
index b14ba96..cec6f11 100644
--- a/coverage-tool/coverage-reporting/intermediate_layer.py
+++ b/coverage-tool/coverage-reporting/intermediate_layer.py
@@ -500,14 +500,18 @@ class BinaryParser(object):
         """
         for source_code_block in BinaryParser.SourceCodeBlock.get(
                 function_block.dwarf):
-            if self.remove_workspace:
-                source_code_block.source_file = remove_workspace(
-                    source_code_block.source_file, self.workspace)
+            source_code_block.source_file = self._relativized_path(
+                source_code_block.source_file)
             yield source_code_block
 
     def _local_path(self, filename):
         return os.path.normpath(os.path.join(self.local_workspace, filename) if self.remove_workspace else filename)
 
+    def _relativized_path(self, filename):
+        if self.remove_workspace:
+            filename = remove_workspace(filename, self.workspace)
+        return os.path.normpath(filename)
+
     def get_function_blocks(self) -> List['BinaryParser.FunctionBlock']:
         """Get list of all the function blocks contained in
         the binary dump file.
@@ -520,9 +524,8 @@ class BinaryParser(object):
                 logger.warning(f"Source file not found for function "
                                f"{function_block.name}, will not be covered")
                 continue
-            if self.remove_workspace:
-                function_block.source_file = remove_workspace(
-                    function_block.source_file, self.workspace)
+            function_block.source_file = self._relativized_path(
+                function_block.source_file)
 
             filenames.add(function_block.source_file)
             function_names.add(function_block.name)
-- 
GitLab