diff options
author | Keith Seitz <keiths@redhat.com> | 2025-04-29 09:08:38 -0700 |
---|---|---|
committer | Keith Seitz <keiths@redhat.com> | 2025-04-29 09:08:38 -0700 |
commit | a7175864d96765b291276cf3a6508b78ad3a9b23 (patch) | |
tree | d54b9427d564efe5a911213a6f6ffbe87524479b | |
parent | f79a8e5aab2d0f4773cca8c33434c5cf0f3e9124 (diff) | |
download | binutils-a7175864d96765b291276cf3a6508b78ad3a9b23.zip binutils-a7175864d96765b291276cf3a6508b78ad3a9b23.tar.gz binutils-a7175864d96765b291276cf3a6508b78ad3a9b23.tar.bz2 |
[gdb/contrib] Add script to license check new files
While reading through gdb-patches backlog after a return
from PTO, I noticed that a newly added file was licensed
with "MIT", and that license was not listed in Fedora's
gdb.spec file. [Fedora no longer supports "effective"
licenses.]
That lead me to this simple script which generates a list
of all the newly added files between two given commits and
scans these files for licenses.
Example usage:
bash$ cd /path/to/binutils-gdb/gdb
bash$ ./contrib/license-check-new-files.sh -s gdb-15-branchpoint gdb-16-branchpoint
Scanning directories gdb*/...
gdb/contrib/common-misspellings.txt: no longer in repo?
gdb/contrib/spellcheck.sh: no longer in repo?
gdbsupport/unordered_dense.h: MIT
I don't think anything in here is Fedora- or RPM-specific,
so I'd like to submit this for consideration for inclusion
in contrib/. I believe other distros may find it useful.
Approved-By: Tom Tromey <tom@tromey.com>
-rwxr-xr-x | gdb/contrib/license-check-new-files.sh | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/gdb/contrib/license-check-new-files.sh b/gdb/contrib/license-check-new-files.sh new file mode 100755 index 0000000..710afa1 --- /dev/null +++ b/gdb/contrib/license-check-new-files.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2025 Free Software Foundation, Inc. +# +# This file is part of GDB. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# This program requires the python modules GitPython (git) and scancode-toolkit. +# It builds a list of all the newly added files to the repository and scans +# each file for a license, printing it to the terminal. If "--skip" is used, +# it will only output non-"common" licenses, e.g., omitting "GPL-3.0-or-later". +# This makes it a little bit easier to detect any possible new licenses. +# +# Example: +# bash$ cd /path/to/binutils-gdb/gdb +# bash$ ./contrib/license-check-new-files.sh -s gdb-15-branchpoint gdb-16-branchpoint +# Scanning directories gdb*/... +# gdb/contrib/common-misspellings.txt: no longer in repo? +# gdb/contrib/spellcheck.sh: no longer in repo? +# gdbsupport/unordered_dense.h: MIT + +import os +import sys +import argparse +from pathlib import PurePath +from git import Repo +from scancode import api + +# A list of "common" licenses. If "--skip" is used, any file +# with a license in this list will be omitted from the output. +COMMON_LICENSES = ["GPL-2.0-or-later", "GPL-3.0-or-later"] + +# Default list of directories to scan. Default scans are limited to +# gdb-specific git directories because much of the rest of binutils-gdb +# is actually owned by other projects/packages. +DEFAULT_SCAN_DIRS = "gdb*" + + +# Get the commit object associated with the string commit CSTR +# from the git repository REPO. +# +# Returns the object or prints an error and exits. +def get_commit(repo, cstr): + try: + return repo.commit(cstr) + except: + print(f'unknown commit "{cstr}"') + sys.exit(2) + + +# Uses scancode-toolkit package to scan FILE's licenses. +# Returns the full license dict from scancode on success or +# propagates any exceptions. +def get_licenses_for_file(file): + return api.get_licenses(file) + + +# Helper function to print FILE to the terminal if skipping +# common licenses. +def skip_print_file(skip, file): + if skip: + print(f"{file}: ", end="") + + +def main(argv): + parser = argparse.ArgumentParser() + parser.add_argument("from_commit") + parser.add_argument("to_commit") + parser.add_argument( + "-s", "--skip", help="skip common licenses in output", action="store_true" + ) + parser.add_argument( + "-p", + "--paths", + help=f'paths to scan (default is "{DEFAULT_SCAN_DIRS}")', + type=str, + default=DEFAULT_SCAN_DIRS, + ) + args = parser.parse_args() + + # Commit boundaries to search for new files + from_commit = args.from_commit + to_commit = args.to_commit + + # Get the list of new files from git. Try the current directory, + # looping up to the root attempting to find a valid git repository. + path = PurePath(os.getcwd()) + paths = list(path.parents) + paths.insert(0, path) + for dir in paths: + try: + repo = Repo(dir) + break + except: + pass + + if dir == path.parents[-1]: + print(f'not a git repository (or any parent up to mount point "{dir}")') + sys.exit(2) + + # Get from/to commits + fc = get_commit(repo, from_commit) + tc = get_commit(repo, to_commit) + + # Loop over new files + paths = [str(dir) for dir in args.paths.split(",")] + print(f'Scanning directories {",".join(f"{s}/" for s in paths)}...') + for file in fc.diff(tc, paths=paths).iter_change_type("A"): + filename = file.a_path + if not args.skip: + print(f"checking licenses for {filename}... ", end="", flush=True) + try: + f = dir.joinpath(dir, filename).as_posix() + lic = get_licenses_for_file(f) + if len(lic["license_clues"]) > 1: + print("multiple licenses detected") + elif ( + not args.skip + or lic["detected_license_expression_spdx"] not in COMMON_LICENSES + ): + skip_print_file(args.skip, filename) + print(f"{lic['detected_license_expression_spdx']}") + except OSError: + # Likely hit a file that was added to the repo and subsequently removed. + skip_print_file(args.skip, filename) + print("no longer in repo?") + except KeyboardInterrupt: + print("interrupted") + break + except Exception as e: + # If scanning fails, there is little we can do but print an error. + skip_print_file(args.skip, filename) + print(e) + + +if __name__ == "__main__": + main(sys.argv) |