From ba886b4e592778393a598a38302bbde63c443192 Mon Sep 17 00:00:00 2001 From: Jakub Sujak Date: Fri, 3 Jan 2025 10:46:24 +0000 Subject: [PATCH 1/2] Add Copyright year checker script and pre-commit hook * Lint copyright headers for the current year and expected copyright year range format. * Automatically change copyright years if not up-to-date. * Add pre-commit hook that runs the linter on committed files. Signed-off-by: Jakub Sujak --- .pre-commit-config.yaml | 11 ++ tools/pre-commit/copyright_year_checker.py | 162 +++++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 tools/pre-commit/copyright_year_checker.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4254f516..eae80e24 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -88,3 +88,14 @@ repos: description: Ensures that latest commit has been signed-off with `--signoff`. pass_filenames: false stages: [ pre-commit, pre-push ] + - id: copyright-year-checker + alias: copyright-year-checker + name: "Copyright Year Checker" + entry: python tools/pre-commit/copyright_year_checker.py + always_run: true + language: python + language_version: python3 + description: Ensures committed files include the current year in the copyright header. + pass_filenames: true + stages: [ pre-commit ] + diff --git a/tools/pre-commit/copyright_year_checker.py b/tools/pre-commit/copyright_year_checker.py new file mode 100644 index 00000000..70b37e15 --- /dev/null +++ b/tools/pre-commit/copyright_year_checker.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +# +# SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates +# +# SPDX-License-Identifier: Apache-2.0 +# +"""Checks committed files include the current year in the copyright header. + +This script is intended to be run as part of the pre-commit hooks. +""" +import argparse +import logging +import re +import subprocess +import sys +from datetime import datetime +from pathlib import Path + +logger = logging.getLogger("Copyright Year Checker") + + +class CopyrightYearChecker: + """A class that checks if committed files contain the current year in the copyright header.""" + + def __init__(self, args): + # The project root path is used as the current working directory for all read/write operations in the checker. + self.dir = Path(__file__).parents[2].resolve() + self.files = args.files + + def run(self) -> int: + """Runs the checker. + + Raises + ------ + ValueError + If the checker fails to execute. + """ + logger.debug(f"Running Copyright Year checker in '{self.dir}'") + + commit_msg_cmd = ["git", "show", "-s", "--format=%B", "HEAD"] + commit_msg = subprocess.check_output(commit_msg_cmd, cwd=self.dir).decode( + "utf-8" + ) + + if "@arm.com" in commit_msg: + for file in self.files: + file = self.dir / file + + needs_year_update = False + + with open(file, "r") as f: + logger.debug(f"Reading file '{file}'") + + lines = f.readlines() + + for i, line in enumerate(lines): + spdx_pattern = re.compile( + r"(?P.*SPDX-FileCopyrightText: Copyright )" + r"(?P.*?)" + r"(?P Arm Limited and/or its affiliates \n$)" + ) + spdx_match = re.match(spdx_pattern, line) + + if spdx_match: + matched_years = spdx_match.group("years") + matched_years = matched_years.replace(" ", "").split(",") + logger.debug(f"Matched Copyright years '{matched_years}'") + + expected_years = matched_years + + current_year = datetime.now().year + previous_year = current_year - 1 + last_updated_year = int(matched_years[-1][-4:]) + + assert isinstance(current_year, int) + assert isinstance(previous_year, int) + assert isinstance(last_updated_year, int) + + logger.debug(f"Current year: {current_year}") + logger.debug(f"Previous year: {previous_year}") + logger.debug(f"Last updated year: {last_updated_year}") + + if last_updated_year > current_year: + raise ValueError( + "Copyright year cannot be greater than current year." + ) + elif last_updated_year == current_year: + logger.info("Copyright years OK") + return 0 + elif last_updated_year == previous_year: + # If it's a year range (e.g. 2023-2024), then update to the current year (becomes 2023-2025). + if "-" in matched_years[-1]: + expected_years[-1] = expected_years[-1].replace( + str(previous_year), str(current_year) + ) + # Otherwise, for standalone years (e.g. 2024) construct a year range with the + # current year (becomes 2024-2025). + else: + expected_years[-1] = ( + f"{expected_years[-1]}-{current_year}" + ) + else: # Missing current year, so add it. + expected_years.append(str(current_year)) + + logger.debug(f"Expected Copyright years {expected_years}") + needs_year_update = True + + copyright_years = "" + for year in expected_years: + copyright_years += year + if year != expected_years[-1]: + copyright_years += ", " + logger.debug(f"Updated years '{copyright_years}'") + + copyright_line = ( + spdx_match.group("spdx") + + copyright_years + + spdx_match.group("author") + ) + lines[i] = copyright_line + + if needs_year_update: + with open(file, "w") as f: + logger.debug(f"Writing to file '{file}'") + for line in lines: + f.write(line) + return 0 + + +def parse_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument( + "files", help="Files to pass to the checker", type=str, nargs="+" + ) + parser.add_argument( + "--debug", + "-D", + help="Enable debug information. Default: False", + action="store_true", + default=False, + ) + args = parser.parse_args() + + return args + + +def run_copyright_year_checker(args): + logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) + logger.debug(f"Arguments passed: {str(args.__dict__)}") + + try: + checker = CopyrightYearChecker(args) + retval = checker.run() + except ValueError as e: + logger.error("Exception caught in Copyright Year checker: %s" % e) + retval = 1 + + return retval + + +if __name__ == "__main__": + sys.exit(run_copyright_year_checker(parse_arguments())) -- GitLab From 4ef4fedab7be466918902183bffc9a34500d135b Mon Sep 17 00:00:00 2001 From: Jakub Sujak Date: Fri, 3 Jan 2025 11:33:52 +0000 Subject: [PATCH 2/2] Use git cached files Signed-off-by: Jakub Sujak --- .pre-commit-config.yaml | 4 +-- tools/pre-commit/copyright_year_checker.py | 39 ++++++++++++++++++++-- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index eae80e24..a48891a4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -91,11 +91,11 @@ repos: - id: copyright-year-checker alias: copyright-year-checker name: "Copyright Year Checker" - entry: python tools/pre-commit/copyright_year_checker.py + entry: python tools/pre-commit/copyright_year_checker.py --use-git-cache always_run: true language: python language_version: python3 description: Ensures committed files include the current year in the copyright header. - pass_filenames: true + pass_filenames: false stages: [ pre-commit ] diff --git a/tools/pre-commit/copyright_year_checker.py b/tools/pre-commit/copyright_year_checker.py index 70b37e15..35f2285d 100644 --- a/tools/pre-commit/copyright_year_checker.py +++ b/tools/pre-commit/copyright_year_checker.py @@ -25,7 +25,7 @@ class CopyrightYearChecker: def __init__(self, args): # The project root path is used as the current working directory for all read/write operations in the checker. self.dir = Path(__file__).parents[2].resolve() - self.files = args.files + self.use_git_cache = args.use_git_cache def run(self) -> int: """Runs the checker. @@ -43,7 +43,37 @@ class CopyrightYearChecker: ) if "@arm.com" in commit_msg: - for file in self.files: + # Get a list of committed files, ignoring any deleted files + if self.use_git_cache: + # Use git cache for running in pre-commit hooks. + commit_files_cmd = [ + "git", + "diff", + "--cached", + "--name-only", + "--diff-filter=d", + "-r", + "HEAD", + ] + else: + # For running the script outside pre-commit hooks. + commit_files_cmd = [ + "git", + "diff-tree", + "--name-only", + "--no-commit-id", + "--diff-filter=d", + "-r", + "HEAD", + ] + + files = subprocess.check_output(commit_files_cmd, cwd=self.dir).decode( + "utf-8" + ) + files = files.split("\n") + files = list(filter(None, files)) + + for file in files: file = self.dir / file needs_year_update = False @@ -130,7 +160,10 @@ class CopyrightYearChecker: def parse_arguments(): parser = argparse.ArgumentParser() parser.add_argument( - "files", help="Files to pass to the checker", type=str, nargs="+" + "--use-git-cache", + help="Use git cached files for use in pre-commit hooks. Default: False", + action="store_true", + default=False, ) parser.add_argument( "--debug", -- GitLab