diff options
author | Martin Liska <mliska@suse.cz> | 2020-05-13 14:27:30 +0200 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2020-05-13 14:27:30 +0200 |
commit | c10aa1f07366732b22a8dc1cb0c63914e91b0434 (patch) | |
tree | da7a74d1d2b274d68a1657308fc9e6f83bcda5a4 /contrib/gcc-changelog/git_commit.py | |
parent | 18edc195442291525e04f0fa4d5ef972155117da (diff) | |
download | gcc-c10aa1f07366732b22a8dc1cb0c63914e91b0434.zip gcc-c10aa1f07366732b22a8dc1cb0c63914e91b0434.tar.gz gcc-c10aa1f07366732b22a8dc1cb0c63914e91b0434.tar.bz2 |
Add gcc-changelog related scripts.
* gcc-changelog/git_check_commit.py: New file.
* gcc-changelog/git_commit.py: New file.
* gcc-changelog/git_email.py: New file.
* gcc-changelog/git_repository.py: New file.
* gcc-changelog/git_update_version.py: New file.
Diffstat (limited to 'contrib/gcc-changelog/git_commit.py')
-rwxr-xr-x | contrib/gcc-changelog/git_commit.py | 536 |
1 files changed, 536 insertions, 0 deletions
diff --git a/contrib/gcc-changelog/git_commit.py b/contrib/gcc-changelog/git_commit.py new file mode 100755 index 0000000..a434f94 --- /dev/null +++ b/contrib/gcc-changelog/git_commit.py @@ -0,0 +1,536 @@ +#!/usr/bin/env python3 +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. */ + +import os +import re + +changelog_locations = set([ + 'config', + 'contrib', + 'contrib/header-tools', + 'contrib/reghunt', + 'contrib/regression', + 'fixincludes', + 'gcc/ada', + 'gcc/analyzer', + 'gcc/brig', + 'gcc/c', + 'gcc/c-family', + 'gcc', + 'gcc/cp', + 'gcc/d', + 'gcc/fortran', + 'gcc/go', + 'gcc/jit', + 'gcc/lto', + 'gcc/objc', + 'gcc/objcp', + 'gcc/po', + 'gcc/testsuite', + 'gnattools', + 'gotools', + 'include', + 'intl', + 'libada', + 'libatomic', + 'libbacktrace', + 'libcc1', + 'libcpp', + 'libcpp/po', + 'libdecnumber', + 'libffi', + 'libgcc', + 'libgcc/config/avr/libf7', + 'libgcc/config/libbid', + 'libgfortran', + 'libgomp', + 'libhsail-rt', + 'libiberty', + 'libitm', + 'libobjc', + 'liboffloadmic', + 'libphobos', + 'libquadmath', + 'libsanitizer', + 'libssp', + 'libstdc++-v3', + 'libvtv', + 'lto-plugin', + 'maintainer-scripts', + 'zlib']) + +bug_components = set([ + 'ada', + 'analyzer', + 'boehm-gc', + 'bootstrap', + 'c', + 'c++', + 'd', + 'debug', + 'demangler', + 'driver', + 'fastjar', + 'fortran', + 'gcov-profile', + 'go', + 'hsa', + 'inline-asm', + 'ipa', + 'java', + 'jit', + 'libbacktrace', + 'libf2c', + 'libffi', + 'libfortran', + 'libgcc', + 'libgcj', + 'libgomp', + 'libitm', + 'libobjc', + 'libquadmath', + 'libstdc++', + 'lto', + 'middle-end', + 'modula2', + 'objc', + 'objc++', + 'other', + 'pch', + 'pending', + 'plugins', + 'preprocessor', + 'regression', + 'rtl-optimization', + 'sanitizer', + 'spam', + 'target', + 'testsuite', + 'translation', + 'tree-optimization', + 'web']) + +ignored_prefixes = [ + 'gcc/d/dmd/', + 'gcc/go/frontend/', + 'libgo/', + 'libphobos/libdruntime', + 'libphobos/src/', + 'libsanitizer/', + ] + +misc_files = [ + 'gcc/DATESTAMP', + 'gcc/BASE-VER', + 'gcc/DEV-PHASE' + ] + +author_line_regex = \ + re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.* <.*>)') +additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.* <.*>)') +changelog_regex = re.compile(r'^([a-z0-9+-/]*)/ChangeLog:?') +pr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?([0-9]+)$') +star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)') + +LINE_LIMIT = 100 +TAB_WIDTH = 8 +CO_AUTHORED_BY_PREFIX = 'co-authored-by: ' + + +class Error: + def __init__(self, message, line=None): + self.message = message + self.line = line + + def __repr__(self): + s = self.message + if self.line: + s += ':"%s"' % self.line + return s + + +class ChangeLogEntry: + def __init__(self, folder, authors, prs): + self.folder = folder + # Python2 has not 'copy' function + self.author_lines = list(authors) + self.initial_prs = list(prs) + self.prs = list(prs) + self.lines = [] + + @property + def files(self): + files = [] + for line in self.lines: + m = star_prefix_regex.match(line) + if m: + line = m.group('content') + if '(' in line: + line = line[:line.index('(')] + if ':' in line: + line = line[:line.index(':')] + for file in line.split(','): + file = file.strip() + if file: + files.append(file) + return files + + @property + def datetime(self): + for author in self.author_lines: + if author[1]: + return author[1] + return None + + @property + def authors(self): + return [author_line[0] for author_line in self.author_lines] + + @property + def is_empty(self): + return not self.lines and self.prs == self.initial_prs + + +class GitCommit: + def __init__(self, hexsha, date, author, body, modified_files, + strict=True): + self.hexsha = hexsha + self.lines = body + self.modified_files = modified_files + self.message = None + self.changes = None + self.changelog_entries = [] + self.errors = [] + self.date = date + self.author = author + self.top_level_authors = [] + self.co_authors = [] + self.top_level_prs = [] + + project_files = [f for f in self.modified_files + if self.is_changelog_filename(f[0]) + or f[0] in misc_files] + if len(project_files) == len(self.modified_files): + # All modified files are only MISC files + return + elif project_files and strict: + self.errors.append(Error('ChangeLog, DATESTAMP, BASE-VER and ' + 'DEV-PHASE updates should be done ' + 'separately from normal commits')) + return + + self.parse_lines() + if self.changes: + self.parse_changelog() + self.deduce_changelog_locations() + if not self.errors: + self.check_mentioned_files() + self.check_for_correct_changelog() + + @property + def success(self): + return not self.errors + + @property + def new_files(self): + return [x[0] for x in self.modified_files if x[1] == 'A'] + + @classmethod + def is_changelog_filename(cls, path): + return path.endswith('/ChangeLog') or path == 'ChangeLog' + + @classmethod + def find_changelog_location(cls, name): + if name.startswith('\t'): + name = name[1:] + if name.endswith(':'): + name = name[:-1] + if name.endswith('/'): + name = name[:-1] + return name if name in changelog_locations else None + + @classmethod + def format_git_author(cls, author): + assert '<' in author + return author.replace('<', ' <') + + @classmethod + def parse_git_name_status(cls, string): + modified_files = [] + for entry in string.split('\n'): + parts = entry.split('\t') + t = parts[0] + if t == 'A' or t == 'D' or t == 'M': + modified_files.append((parts[1], t)) + elif t == 'R': + modified_files.append((parts[1], 'D')) + modified_files.append((parts[2], 'A')) + return modified_files + + def parse_lines(self): + body = self.lines + + for i, b in enumerate(body): + if not b: + continue + if (changelog_regex.match(b) or self.find_changelog_location(b) + or star_prefix_regex.match(b) or pr_regex.match(b) + or author_line_regex.match(b)): + self.changes = body[i:] + return + self.errors.append(Error('cannot find a ChangeLog location in ' + 'message')) + + def parse_changelog(self): + last_entry = None + will_deduce = False + for line in self.changes: + if not line: + if last_entry and will_deduce: + last_entry = None + continue + if line != line.rstrip(): + self.errors.append(Error('trailing whitespace', line)) + if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT: + self.errors.append(Error('line limit exceeds %d characters' + % LINE_LIMIT, line)) + m = changelog_regex.match(line) + if m: + last_entry = ChangeLogEntry(m.group(1), self.top_level_authors, + self.top_level_prs) + self.changelog_entries.append(last_entry) + elif self.find_changelog_location(line): + last_entry = ChangeLogEntry(self.find_changelog_location(line), + self.top_level_authors, + self.top_level_prs) + self.changelog_entries.append(last_entry) + else: + author_tuple = None + pr_line = None + if author_line_regex.match(line): + m = author_line_regex.match(line) + author_tuple = (m.group('name'), m.group('datetime')) + elif additional_author_regex.match(line): + m = additional_author_regex.match(line) + if len(m.group('spaces')) != 4: + msg = 'additional author must prepend with tab ' \ + 'and 4 spaces' + self.errors.append(Error(msg, line)) + else: + author_tuple = (m.group('name'), None) + elif pr_regex.match(line): + component = pr_regex.match(line).group('component') + if not component: + self.errors.append(Error('missing PR component', line)) + continue + elif not component[:-1] in bug_components: + self.errors.append(Error('invalid PR component', line)) + continue + else: + pr_line = line.lstrip() + + if line.lower().startswith(CO_AUTHORED_BY_PREFIX): + name = line[len(CO_AUTHORED_BY_PREFIX):] + author = self.format_git_author(name) + self.co_authors.append(author) + continue + + # ChangeLog name will be deduced later + if not last_entry: + if author_tuple: + self.top_level_authors.append(author_tuple) + continue + elif pr_line: + # append to top_level_prs only when we haven't met + # a ChangeLog entry + if (pr_line not in self.top_level_prs + and not self.changelog_entries): + self.top_level_prs.append(pr_line) + continue + else: + last_entry = ChangeLogEntry(None, + self.top_level_authors, + self.top_level_prs) + self.changelog_entries.append(last_entry) + will_deduce = True + elif author_tuple: + last_entry.author_lines.append(author_tuple) + continue + + if not line.startswith('\t'): + err = Error('line should start with a tab', line) + self.errors.append(err) + elif pr_line: + last_entry.prs.append(pr_line) + else: + m = star_prefix_regex.match(line) + if m: + if len(m.group('spaces')) != 1: + err = Error('one space should follow asterisk', + line) + self.errors.append(err) + else: + last_entry.lines.append(line) + else: + if last_entry.is_empty: + msg = 'first line should start with a tab, ' \ + 'asterisk and space' + self.errors.append(Error(msg, line)) + else: + last_entry.lines.append(line) + + def get_file_changelog_location(self, changelog_file): + for file in self.modified_files: + if file[0] == changelog_file: + # root ChangeLog file + return '' + index = file[0].find('/' + changelog_file) + if index != -1: + return file[0][:index] + return None + + def deduce_changelog_locations(self): + for entry in self.changelog_entries: + if not entry.folder: + changelog = None + for file in entry.files: + location = self.get_file_changelog_location(file) + if (location == '' + or (location and location in changelog_locations)): + if changelog and changelog != location: + msg = 'could not deduce ChangeLog file, ' \ + 'not unique location' + self.errors.append(Error(msg)) + return + changelog = location + if changelog is not None: + entry.folder = changelog + else: + msg = 'could not deduce ChangeLog file' + self.errors.append(Error(msg)) + + @classmethod + def in_ignored_location(cls, path): + for ignored in ignored_prefixes: + if path.startswith(ignored): + return True + return False + + @classmethod + def get_changelog_by_path(cls, path): + components = path.split('/') + while components: + if '/'.join(components) in changelog_locations: + break + components = components[:-1] + return '/'.join(components) + + def check_mentioned_files(self): + folder_count = len([x.folder for x in self.changelog_entries]) + assert folder_count == len(self.changelog_entries) + + mentioned_files = set() + for entry in self.changelog_entries: + if not entry.files: + msg = 'ChangeLog must contain a file entry' + self.errors.append(Error(msg, entry.folder)) + assert not entry.folder.endswith('/') + for file in entry.files: + if not self.is_changelog_filename(file): + mentioned_files.add(os.path.join(entry.folder, file)) + + cand = [x[0] for x in self.modified_files + if not self.is_changelog_filename(x[0])] + changed_files = set(cand) + for file in sorted(mentioned_files - changed_files): + self.errors.append(Error('file not changed in a patch', file)) + for file in sorted(changed_files - mentioned_files): + if not self.in_ignored_location(file): + if file in self.new_files: + changelog_location = self.get_changelog_by_path(file) + # Python2: we cannot use next(filter(...)) + entries = filter(lambda x: x.folder == changelog_location, + self.changelog_entries) + entries = list(entries) + entry = entries[0] if entries else None + if not entry: + prs = self.top_level_prs + if not prs: + # if all ChangeLog entries have identical PRs + # then use them + prs = self.changelog_entries[0].prs + for entry in self.changelog_entries: + if entry.prs != prs: + prs = [] + break + entry = ChangeLogEntry(changelog_location, + self.top_level_authors, + prs) + self.changelog_entries.append(entry) + # strip prefix of the file + assert file.startswith(entry.folder) + file = file[len(entry.folder):].lstrip('/') + entry.lines.append('\t* %s: New file.' % file) + else: + msg = 'changed file not mentioned in a ChangeLog' + self.errors.append(Error(msg, file)) + + def check_for_correct_changelog(self): + for entry in self.changelog_entries: + for file in entry.files: + full_path = os.path.join(entry.folder, file) + changelog_location = self.get_changelog_by_path(full_path) + if changelog_location != entry.folder: + msg = 'wrong ChangeLog location "%s", should be "%s"' + err = Error(msg % (entry.folder, changelog_location), file) + self.errors.append(err) + + def to_changelog_entries(self): + for entry in self.changelog_entries: + output = '' + timestamp = entry.datetime + if not timestamp: + timestamp = self.date.strftime('%Y-%m-%d') + authors = entry.authors if entry.authors else [self.author] + # add Co-Authored-By authors to all ChangeLog entries + for author in self.co_authors: + if author not in authors: + authors.append(author) + + for i, author in enumerate(authors): + if i == 0: + output += '%s %s\n' % (timestamp, author) + else: + output += '\t %s\n' % author + output += '\n' + for pr in entry.prs: + output += '\t%s\n' % pr + for line in entry.lines: + output += line + '\n' + yield (entry.folder, output.rstrip()) + + def print_output(self): + for entry, output in self.to_changelog_entries(): + print('@@CL %s' % entry) + print(output) + print('@@CL') + + def print_errors(self): + print('Errors:') + for error in self.errors: + print(error) |