From 2df531c16083fbca748bc9f5d46432b1746d0fd9 Mon Sep 17 00:00:00 2001
From: Sean Fitzgibbon <sean.fitzgibbon@arm.com>
Date: Mon, 9 Jun 2025 12:34:21 +0100
Subject: [PATCH 1/5] Delete unreachable code

Change-Id: I76079a1c16cd29474908059ed057da559e4ad901
---
 coverage-tool/coverage-reporting/intermediate_layer.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py
index 55474b9..60c8b3c 100644
--- a/coverage-tool/coverage-reporting/intermediate_layer.py
+++ b/coverage-tool/coverage-reporting/intermediate_layer.py
@@ -1,6 +1,6 @@
 # !/usr/bin/env python
 ###############################################################################
-# Copyright (c) 2020-2023, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2020-2025, ARM Limited and Contributors. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 ###############################################################################
@@ -218,10 +218,6 @@ def get_function_line_numbers(source_file: str) -> Dict[str, int]:
     except BaseException:
         logger.warning("Warning: Can't get all function line numbers from %s" %
                        source_file)
-    except Exception as ex:
-        logger.warning(f"Warning: Unknown error '{ex}' when executing command "
-                       f"'{command}'")
-        return {}
     return fln
 
 
-- 
GitLab


From f88a6782f0fe30818f7d862c2b69e5f1e33399db Mon Sep 17 00:00:00 2001
From: Sean Fitzgibbon <sean.fitzgibbon@arm.com>
Date: Mon, 9 Jun 2025 13:14:28 +0100
Subject: [PATCH 2/5] Optimize _process_fn_no_sources

- find is faster than the -r option for grep, and its results can be
  cached.
- It's faster to scan a file by hand and lookup all the function names
  at once than it is to invoke grep on it once per function name.

Change-Id: I2888c60f019e218667b760f015bff86a09f11843
---
 .../coverage-reporting/intermediate_layer.py  | 39 ++++++++++++++++---
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py
index 60c8b3c..21ba356 100644
--- a/coverage-tool/coverage-reporting/intermediate_layer.py
+++ b/coverage-tool/coverage-reporting/intermediate_layer.py
@@ -29,6 +29,8 @@ from typing import Generator
 from typing import Union
 from typing import Tuple
 import logging
+import sys
+from collections import defaultdict
 
 __version__ = "7.0"
 
@@ -560,6 +562,24 @@ class CoverageHandler(object):
         return self._source_files
 
 
+IDENTIFIER = re.compile(r'\b[A-Za-z_]\w*\b')
+
+def identifiers(filename):
+    with open(filename) as f:
+        for line in f:
+            for match in IDENTIFIER.finditer(line):
+                yield match.group()
+
+def locate_identifiers(ids, filenames):
+    """Map each identifier to the set of files in which it appears."""
+    result = defaultdict(set)
+    for filename in filenames:
+        for s in identifiers(filename):
+            if s in ids:
+                result[s].add(filename)
+    return result
+
+
 class IntermediateCodeCoverage(object):
     """Class used to process the trace data along with the dwarf
     signature files to produce an intermediate layer in json with
@@ -591,6 +611,7 @@ class IntermediateCodeCoverage(object):
         self.elf_map = {}
         # For elf custom mappings
         self.elf_custom = None
+        self.found_source_files = None
 
     def process(self):
         """
@@ -686,6 +707,15 @@ class IntermediateCodeCoverage(object):
         self._process_fn_no_sources(parser)
         return parser
 
+    def find_source_files(self):
+        # Cache the list of .c, .s and .S files found in the local workspace.
+        if self.found_source_files is None:
+            self.found_source_files = [sys.intern(f) for f in subprocess.check_output([
+                'find', self.local_workspace,
+                '(', '-name', '*.c', '-o', '-name', '*.s', '-o', '-name', '*.S', ')',
+                '-type', 'f', '-print0']).decode('utf-8').rstrip('\0').split('\0')]
+        return self.found_source_files
+
     def _process_fn_no_sources(self, parser: BinaryParser):
         """
         Checks function coverage for functions with no dwarf signature i.e.
@@ -701,17 +731,14 @@ class IntermediateCodeCoverage(object):
         traces_address_pointer = 0
         _functions = parser.no_source_functions
         functions_addresses = sorted(_functions.keys())
+        function_names = [v['name'] for v in _functions.values()]
+        hits = locate_identifiers(function_names, self.find_source_files())
         address_size = 4
         for start_address in functions_addresses:
             function_covered = False
             function_name = _functions[start_address]['name']
             # Get all files in the source code where the function is defined
-            source_files = os_command("grep --include '*.c' --include '*.s' "
-                                      "--include '*.S' -nrw '{}' {}"
-                                      "| cut -d: -f1".
-                                      format(function_name,
-                                             self.local_workspace))
-            unique_files = set(source_files.split())
+            unique_files = hits[function_name]
             sources_found = []
             for source_file in unique_files:
                 line_number = parser.function_line_numbers.get_line_number(
-- 
GitLab


From ca980a7a6b2f23510b3da2446d00368229d45c10 Mon Sep 17 00:00:00 2001
From: Sean Fitzgibbon <sean.fitzgibbon@arm.com>
Date: Tue, 10 Jun 2025 17:20:35 +0100
Subject: [PATCH 3/5] Optimize parsing identifiers from source file

- It's faster to scan the whole file at once rather than one
  line at a time.

Change-Id: Ia85eb1bebd59ce1174ba766ed1342ed665e0bdf3
---
 coverage-tool/coverage-reporting/intermediate_layer.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py
index 21ba356..3756c67 100644
--- a/coverage-tool/coverage-reporting/intermediate_layer.py
+++ b/coverage-tool/coverage-reporting/intermediate_layer.py
@@ -566,9 +566,8 @@ IDENTIFIER = re.compile(r'\b[A-Za-z_]\w*\b')
 
 def identifiers(filename):
     with open(filename) as f:
-        for line in f:
-            for match in IDENTIFIER.finditer(line):
-                yield match.group()
+        for match in IDENTIFIER.finditer(f.read()):
+            yield match.group()
 
 def locate_identifiers(ids, filenames):
     """Map each identifier to the set of files in which it appears."""
-- 
GitLab


From 0456434442b5e6286d50384fdd5d74bc9fd57089 Mon Sep 17 00:00:00 2001
From: Sean Fitzgibbon <sean.fitzgibbon@arm.com>
Date: Tue, 10 Jun 2025 17:23:59 +0100
Subject: [PATCH 4/5] Simplify and slightly speed up find command line

- Fix -print0 ugliness
- Send stderr to /dev/null instead of the console - find can be chatty.

Change-Id: I061d750bc3c9deae393a2f76594beda1d7c516c1
---
 coverage-tool/coverage-reporting/intermediate_layer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py
index 3756c67..bac53bc 100644
--- a/coverage-tool/coverage-reporting/intermediate_layer.py
+++ b/coverage-tool/coverage-reporting/intermediate_layer.py
@@ -712,7 +712,9 @@ class IntermediateCodeCoverage(object):
             self.found_source_files = [sys.intern(f) for f in subprocess.check_output([
                 'find', self.local_workspace,
                 '(', '-name', '*.c', '-o', '-name', '*.s', '-o', '-name', '*.S', ')',
-                '-type', 'f', '-print0']).decode('utf-8').rstrip('\0').split('\0')]
+                '-type', 'f'],
+                    universal_newlines=True,
+                    stderr=subprocess.DEVNULL).splitlines() ]
         return self.found_source_files
 
     def _process_fn_no_sources(self, parser: BinaryParser):
-- 
GitLab


From 36a799c55dd6941221f228f387bb6581baae66df Mon Sep 17 00:00:00 2001
From: Sean Fitzgibbon <sean.fitzgibbon@arm.com>
Date: Wed, 11 Jun 2025 09:33:07 +0100
Subject: [PATCH 5/5] Optimize ctags calls

It's expensive to call ctags once per source file identified in the
DWARF data.  This patch:

- Calls ctags on all the source files from a single ELF at once. '-L -'
  is used to avoid risk of hitting OS limits with a huge command line.

- Similarly, reads the output a line at a time to avoid holding a huge
  amount of ctags output at once in memory.

- Recovers the file paths from the lines output by ctags, which it
  always prints in '-x' mode, even for a single input file. (Note that
  this will break on paths with embedded spaces.  This limitation was
  present in the original through the use of shell=True, so it is
  assumed to be acceptable here - consider json output if it becomes a
  problem).

- Renames 'workspace' to 'local_workspace' in FunctionLineNumbers for
  consistency with the rest of the program.  (For programming
  convenience, the cache of results is changed to key by local file name
  - i.e. with local_workspace prepended - rather than the caller's file
  name, but is otherwise unchanged).

Note the apparent loss of specificity in the error message for ctags
failure, which no longer outputs each file name.  This was probably
intended to report problems with particular missing, corrupted, or
unreadable files.  In practice, Exuberant ctags returns success in these
cases, even for a single file on the command line.  The only way I found
to provoke a nonzero exit code was to pass a bad command line argument.
The point is that ctags will either fail for every file or none of them,
so testing each exit code is not effective.  If file-specific errors
need to be reported, consider capturing and logging lines of stderr.

Change-Id: I89c80a8ac181d6bc01351c0f4922941829a1ed5c
---
 .../coverage-reporting/intermediate_layer.py  | 110 +++++++++++-------
 1 file changed, 69 insertions(+), 41 deletions(-)

diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py
index bac53bc..18507d5 100644
--- a/coverage-tool/coverage-reporting/intermediate_layer.py
+++ b/coverage-tool/coverage-reporting/intermediate_layer.py
@@ -28,6 +28,7 @@ from typing import List
 from typing import Generator
 from typing import Union
 from typing import Tuple
+from typing import Iterable
 import logging
 import sys
 from collections import defaultdict
@@ -195,57 +196,82 @@ def remove_workspace(path, workspace):
     return ret
 
 
-def get_function_line_numbers(source_file: str) -> Dict[str, int]:
+def get_function_line_numbers(source_file_list: Iterable[str]) -> Dict[str, Dict[str, int]]:
     """
     Using ctags get all the function names with their line numbers
-    within the source_file
+    within each source file.
 
-    :return: Dictionary with function name as key and line number as value
+    :return: Dictionary with source file name as key.  Value is a dictionary with
+             function name as key and line number as value.
     """
-    command = "ctags -x --c-kinds=f {}".format(source_file)
-    fln = {}
+    fln = defaultdict(dict)
     try:
-        function_lines = os_command(command).split("\n")
-        for line in function_lines:
+        proc = subprocess.Popen(
+            ['ctags', '--c-kinds=f', '-x', '-L', '-'],
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.DEVNULL,
+            universal_newlines=True # aka 'text' keyword parameter
+        )
+
+        proc.stdin.write('\n'.join(source_file_list) + '\n')
+        proc.stdin.close()
+
+        for line in proc.stdout:
             cols = line.split()
-            if len(cols) < 3:
+            if len(cols) < 4:
                 continue
+            line_no = int(cols[2])
+            source_file = cols[3]
             if cols[1] == "function":
-                fln[cols[0]] = int(cols[2])
+                fln[source_file][cols[0]] = line_no
             elif cols[1] == "label":
                 if cols[0] == "func":
-                    fln[cols[-1]] = int(cols[2])
+                    fln[source_file][cols[-1]] = line_no
                 elif cols[0] + ":" == cols[-1]:
-                    fln[cols[0]] = int(cols[2])
+                    fln[source_file][cols[0]] = line_no
+
+        if proc.wait() != 0:
+            raise Exception()
+
     except BaseException:
-        logger.warning("Warning: Can't get all function line numbers from %s" %
-                       source_file)
+        logger.warning("Warning: Can't get all function line numbers")
     return fln
 
 
 class FunctionLineNumbers(object):
-    """Helper class used to get a function start line number within
-     a source code file"""
+    """Helper class used to get all the source code files and start line
+       numbers for a function"""
 
-    def __init__(self, workspace: str):
+    def __init__(self, local_workspace: str):
         """
         Initialise dictionary to allocate source code files with the
         corresponding function start line numbers.
 
-        :param workspace: The folder where the source files are deployed
+        :param local_workspace: The folder where the source files are deployed
         """
-        self.filenames = {}
-        self.workspace = workspace
+        self.filenames = {} # Keys are file paths in the local workspace, where we run ctags
+        self.local_workspace = local_workspace
 
-    def get_line_number(self, filename: str, function_name: str) -> int:
+    def get_line_numbers(self, filenames: List[str], function_name: str) -> Dict[str, int]:
         if not FUNCTION_LINES_ENABLED:
-            return 0
-        if filename not in self.filenames:
-            source_file = os.path.join(self.workspace, filename)
-            # Get all functions with their lines in the source file
-            self.filenames[filename] = get_function_line_numbers(source_file)
-        return 0 if function_name not in self.filenames[filename] else \
-            self.filenames[filename][function_name]
+            return {}
+
+        # Input filenames are relative to the workspace and need adjusting for
+        # the local_workspace.
+        def localize(filename):
+            return os.path.join(self.local_workspace, filename)
+
+        local_filenames = { localize(f) for f in filenames }
+
+        missing = local_filenames - self.filenames.keys()
+        if missing:
+            self.filenames.update(get_function_line_numbers(missing))
+
+        # A dict with the input file name as key and the line number as value.
+        return { f : self.filenames[localize(f)][function_name]
+                 for f in filenames
+                 if function_name in self.filenames.get(localize(f), {}) }
 
 
 class BinaryParser(object):
@@ -472,11 +498,12 @@ class BinaryParser(object):
                     source_code_block.source_file, self.workspace)
             yield source_code_block
 
-    def get_function_block(self) -> Generator['BinaryParser.FunctionBlock',
-                                              None, None]:
-        """Generator method to obtain all the function blocks contained in
+    def get_function_blocks(self) -> List['BinaryParser.FunctionBlock']:
+        """Get list of all the function blocks contained in
         the binary dump file.
         """
+        result = []
+        filenames = []
         for function_block in BinaryParser.FunctionBlock.get(self.dump):
             if function_block.source_file is None:
                 logger.warning(f"Source file not found for function "
@@ -485,10 +512,14 @@ class BinaryParser(object):
             if self.remove_workspace:
                 function_block.source_file = remove_workspace(
                     function_block.source_file, self.workspace)
-            function_block.function_line_number = \
-                self.function_line_numbers.get_line_number(
-                    function_block.source_file, function_block.name)
-            yield function_block
+
+            filenames.append(function_block.source_file)
+            result.append(function_block)
+
+        for function_block in result:
+            line_numbers_by_file = self.function_line_numbers.get_line_numbers(filenames, function_block.name)
+            function_block.function_line_number = line_numbers_by_file.get(function_block.source_file, 0)
+        return result
 
 
 class CoverageHandler(object):
@@ -673,7 +704,7 @@ class IntermediateCodeCoverage(object):
                               self.local_workspace)
         total_number_functions = 0
         functions_covered = 0
-        for function_block in parser.get_function_block():
+        for function_block in parser.get_function_blocks():
             total_number_functions += 1
             # Function contains source code
             self.coverage.add_function_coverage(function_block)
@@ -740,12 +771,9 @@ class IntermediateCodeCoverage(object):
             function_name = _functions[start_address]['name']
             # Get all files in the source code where the function is defined
             unique_files = hits[function_name]
-            sources_found = []
-            for source_file in unique_files:
-                line_number = parser.function_line_numbers.get_line_number(
-                    source_file, function_name)
-                if line_number > 0:
-                    sources_found.append((source_file, line_number))
+            line_numbers_by_file = parser.function_line_numbers.get_line_numbers(
+                    unique_files, function_name)
+            sources_found = list(line_numbers_by_file.items())
             if len(sources_found) == 0:
                 logger.debug(f"'{function_name}' not found in sources")
             elif len(sources_found) > 1:
-- 
GitLab