aboutsummaryrefslogtreecommitdiff
path: root/contrib/gcc-changelog/git_commit.py
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/gcc-changelog/git_commit.py')
-rwxr-xr-xcontrib/gcc-changelog/git_commit.py677
1 files changed, 677 insertions, 0 deletions
diff --git a/contrib/gcc-changelog/git_commit.py b/contrib/gcc-changelog/git_commit.py
new file mode 100755
index 0000000..5a9cc4c
--- /dev/null
+++ b/contrib/gcc-changelog/git_commit.py
@@ -0,0 +1,677 @@
+#!/usr/bin/env python3
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>. */
+
+import os
+import re
+
+changelog_locations = set([
+ 'config',
+ 'contrib',
+ 'contrib/header-tools',
+ 'contrib/reghunt',
+ 'contrib/regression',
+ 'fixincludes',
+ 'gcc/ada',
+ 'gcc/analyzer',
+ 'gcc/brig',
+ 'gcc/c',
+ 'gcc/c-family',
+ 'gcc',
+ 'gcc/cp',
+ 'gcc/d',
+ 'gcc/fortran',
+ 'gcc/go',
+ 'gcc/jit',
+ 'gcc/lto',
+ 'gcc/objc',
+ 'gcc/objcp',
+ 'gcc/po',
+ 'gcc/testsuite',
+ 'gnattools',
+ 'gotools',
+ 'include',
+ 'intl',
+ 'libada',
+ 'libatomic',
+ 'libbacktrace',
+ 'libcc1',
+ 'libcpp',
+ 'libcpp/po',
+ 'libdecnumber',
+ 'libffi',
+ 'libgcc',
+ 'libgcc/config/avr/libf7',
+ 'libgcc/config/libbid',
+ 'libgfortran',
+ 'libgomp',
+ 'libhsail-rt',
+ 'libiberty',
+ 'libitm',
+ 'libobjc',
+ 'liboffloadmic',
+ 'libphobos',
+ 'libquadmath',
+ 'libsanitizer',
+ 'libssp',
+ 'libstdc++-v3',
+ 'libvtv',
+ 'lto-plugin',
+ 'maintainer-scripts',
+ 'zlib'])
+
+bug_components = set([
+ 'ada',
+ 'analyzer',
+ 'boehm-gc',
+ 'bootstrap',
+ 'c',
+ 'c++',
+ 'd',
+ 'debug',
+ 'demangler',
+ 'driver',
+ 'fastjar',
+ 'fortran',
+ 'gcov-profile',
+ 'go',
+ 'hsa',
+ 'inline-asm',
+ 'ipa',
+ 'java',
+ 'jit',
+ 'libbacktrace',
+ 'libf2c',
+ 'libffi',
+ 'libfortran',
+ 'libgcc',
+ 'libgcj',
+ 'libgomp',
+ 'libitm',
+ 'libobjc',
+ 'libquadmath',
+ 'libstdc++',
+ 'lto',
+ 'middle-end',
+ 'modula2',
+ 'objc',
+ 'objc++',
+ 'other',
+ 'pch',
+ 'pending',
+ 'plugins',
+ 'preprocessor',
+ 'regression',
+ 'rtl-optimization',
+ 'sanitizer',
+ 'spam',
+ 'target',
+ 'testsuite',
+ 'translation',
+ 'tree-optimization',
+ 'web'])
+
+ignored_prefixes = [
+ 'gcc/d/dmd/',
+ 'gcc/go/gofrontend/',
+ 'gcc/testsuite/gdc.test/',
+ 'gcc/testsuite/go.test/test/',
+ 'libgo/',
+ 'libphobos/libdruntime/',
+ 'libphobos/src/',
+ 'libsanitizer/',
+ ]
+
+wildcard_prefixes = [
+ 'gcc/testsuite/',
+ 'libstdc++-v3/doc/html/'
+ ]
+
+misc_files = [
+ 'gcc/DATESTAMP',
+ 'gcc/BASE-VER',
+ 'gcc/DEV-PHASE'
+ ]
+
+author_line_regex = \
+ re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.* <.*>)')
+additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.* <.*>)')
+changelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
+pr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?([0-9]+)$')
+dr_regex = re.compile(r'\tDR ([0-9]+)$')
+star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
+end_of_location_regex = re.compile(r'[\[<(:]')
+
+LINE_LIMIT = 100
+TAB_WIDTH = 8
+CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
+CHERRY_PICK_PREFIX = '(cherry picked from commit '
+REVERT_PREFIX = 'This reverts commit '
+
+REVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
+ 'acked-by: ', 'tested-by: ', 'reported-by: ',
+ 'suggested-by: ')
+DATE_FORMAT = '%Y-%m-%d'
+
+
+class Error:
+ def __init__(self, message, line=None):
+ self.message = message
+ self.line = line
+
+ def __repr__(self):
+ s = self.message
+ if self.line:
+ s += ':"%s"' % self.line
+ return s
+
+
+class ChangeLogEntry:
+ def __init__(self, folder, authors, prs):
+ self.folder = folder
+ # The 'list.copy()' function is not available before Python 3.3
+ self.author_lines = list(authors)
+ self.initial_prs = list(prs)
+ self.prs = list(prs)
+ self.lines = []
+ self.files = []
+ self.file_patterns = []
+
+ def parse_file_names(self):
+ # Whether the content currently processed is between a star prefix the
+ # end of the file list: a colon or an open paren.
+ in_location = False
+
+ for line in self.lines:
+ # If this line matches the star prefix, start the location
+ # processing on the information that follows the star.
+ m = star_prefix_regex.match(line)
+ if m:
+ in_location = True
+ line = m.group('content')
+
+ if in_location:
+ # Strip everything that is not a filename in "line":
+ # entities "(NAME)", cases "<PATTERN>", conditions
+ # "[COND]", entry text (the colon, if present, and
+ # anything that follows it).
+ m = end_of_location_regex.search(line)
+ if m:
+ line = line[:m.start()]
+ in_location = False
+
+ # At this point, all that's left is a list of filenames
+ # separated by commas and whitespaces.
+ for file in line.split(','):
+ file = file.strip()
+ if file:
+ if file.endswith('*'):
+ self.file_patterns.append(file[:-1])
+ else:
+ self.files.append(file)
+
+ @property
+ def datetime(self):
+ for author in self.author_lines:
+ if author[1]:
+ return author[1]
+ return None
+
+ @property
+ def authors(self):
+ return [author_line[0] for author_line in self.author_lines]
+
+ @property
+ def is_empty(self):
+ return not self.lines and self.prs == self.initial_prs
+
+ def contains_author(self, author):
+ for author_lines in self.author_lines:
+ if author_lines[0] == author:
+ return True
+ return False
+
+
+class GitInfo:
+ def __init__(self, hexsha, date, author, lines, modified_files):
+ self.hexsha = hexsha
+ self.date = date
+ self.author = author
+ self.lines = lines
+ self.modified_files = modified_files
+
+
+class GitCommit:
+ def __init__(self, info, strict=True, commit_to_info_hook=None):
+ self.original_info = info
+ self.info = info
+ self.message = None
+ self.changes = None
+ self.changelog_entries = []
+ self.errors = []
+ self.top_level_authors = []
+ self.co_authors = []
+ self.top_level_prs = []
+ self.cherry_pick_commit = None
+ self.revert_commit = None
+ self.commit_to_info_hook = commit_to_info_hook
+
+ # Identify first if the commit is a Revert commit
+ for line in self.info.lines:
+ if line.startswith(REVERT_PREFIX):
+ self.revert_commit = line[len(REVERT_PREFIX):].rstrip('.')
+ break
+ if self.revert_commit:
+ self.info = self.commit_to_info_hook(self.revert_commit)
+
+ project_files = [f for f in self.info.modified_files
+ if self.is_changelog_filename(f[0])
+ or f[0] in misc_files]
+ ignored_files = [f for f in self.info.modified_files
+ if self.in_ignored_location(f[0])]
+ if len(project_files) == len(self.info.modified_files):
+ # All modified files are only MISC files
+ return
+ elif project_files and strict:
+ self.errors.append(Error('ChangeLog, DATESTAMP, BASE-VER and '
+ 'DEV-PHASE updates should be done '
+ 'separately from normal commits'))
+ return
+
+ all_are_ignored = (len(project_files) + len(ignored_files)
+ == len(self.info.modified_files))
+ self.parse_lines(all_are_ignored)
+ if self.changes:
+ self.parse_changelog()
+ self.parse_file_names()
+ self.check_for_empty_description()
+ self.deduce_changelog_locations()
+ self.check_file_patterns()
+ if not self.errors:
+ self.check_mentioned_files()
+ self.check_for_correct_changelog()
+
+ @property
+ def success(self):
+ return not self.errors
+
+ @property
+ def new_files(self):
+ return [x[0] for x in self.info.modified_files if x[1] == 'A']
+
+ @classmethod
+ def is_changelog_filename(cls, path):
+ return path.endswith('/ChangeLog') or path == 'ChangeLog'
+
+ @classmethod
+ def find_changelog_location(cls, name):
+ if name.startswith('\t'):
+ name = name[1:]
+ if name.endswith(':'):
+ name = name[:-1]
+ if name.endswith('/'):
+ name = name[:-1]
+ return name if name in changelog_locations else None
+
+ @classmethod
+ def format_git_author(cls, author):
+ assert '<' in author
+ return author.replace('<', ' <')
+
+ @classmethod
+ def parse_git_name_status(cls, string):
+ modified_files = []
+ for entry in string.split('\n'):
+ parts = entry.split('\t')
+ t = parts[0]
+ if t == 'A' or t == 'D' or t == 'M':
+ modified_files.append((parts[1], t))
+ elif t.startswith('R'):
+ modified_files.append((parts[1], 'D'))
+ modified_files.append((parts[2], 'A'))
+ return modified_files
+
+ def parse_lines(self, all_are_ignored):
+ body = self.info.lines
+
+ for i, b in enumerate(body):
+ if not b:
+ continue
+ if (changelog_regex.match(b) or self.find_changelog_location(b)
+ or star_prefix_regex.match(b) or pr_regex.match(b)
+ or dr_regex.match(b) or author_line_regex.match(b)):
+ self.changes = body[i:]
+ return
+ if not all_are_ignored:
+ self.errors.append(Error('cannot find a ChangeLog location in '
+ 'message'))
+
+ def parse_changelog(self):
+ last_entry = None
+ will_deduce = False
+ for line in self.changes:
+ if not line:
+ if last_entry and will_deduce:
+ last_entry = None
+ continue
+ if line != line.rstrip():
+ self.errors.append(Error('trailing whitespace', line))
+ if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
+ self.errors.append(Error('line exceeds %d character limit'
+ % LINE_LIMIT, line))
+ m = changelog_regex.match(line)
+ if m:
+ last_entry = ChangeLogEntry(m.group(1).rstrip('/'),
+ self.top_level_authors,
+ self.top_level_prs)
+ self.changelog_entries.append(last_entry)
+ elif self.find_changelog_location(line):
+ last_entry = ChangeLogEntry(self.find_changelog_location(line),
+ self.top_level_authors,
+ self.top_level_prs)
+ self.changelog_entries.append(last_entry)
+ else:
+ author_tuple = None
+ pr_line = None
+ if author_line_regex.match(line):
+ m = author_line_regex.match(line)
+ author_tuple = (m.group('name'), m.group('datetime'))
+ elif additional_author_regex.match(line):
+ m = additional_author_regex.match(line)
+ if len(m.group('spaces')) != 4:
+ msg = 'additional author must be indented with '\
+ 'one tab and four spaces'
+ self.errors.append(Error(msg, line))
+ else:
+ author_tuple = (m.group('name'), None)
+ elif pr_regex.match(line):
+ component = pr_regex.match(line).group('component')
+ if not component:
+ self.errors.append(Error('missing PR component', line))
+ continue
+ elif not component[:-1] in bug_components:
+ self.errors.append(Error('invalid PR component', line))
+ continue
+ else:
+ pr_line = line.lstrip()
+ elif dr_regex.match(line):
+ pr_line = line.lstrip()
+
+ lowered_line = line.lower()
+ if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
+ name = line[len(CO_AUTHORED_BY_PREFIX):]
+ author = self.format_git_author(name)
+ self.co_authors.append(author)
+ continue
+ elif lowered_line.startswith(REVIEW_PREFIXES):
+ continue
+ elif line.startswith(CHERRY_PICK_PREFIX):
+ commit = line[len(CHERRY_PICK_PREFIX):].rstrip(')')
+ self.cherry_pick_commit = commit
+ continue
+
+ # ChangeLog name will be deduced later
+ if not last_entry:
+ if author_tuple:
+ self.top_level_authors.append(author_tuple)
+ continue
+ elif pr_line:
+ # append to top_level_prs only when we haven't met
+ # a ChangeLog entry
+ if (pr_line not in self.top_level_prs
+ and not self.changelog_entries):
+ self.top_level_prs.append(pr_line)
+ continue
+ else:
+ last_entry = ChangeLogEntry(None,
+ self.top_level_authors,
+ self.top_level_prs)
+ self.changelog_entries.append(last_entry)
+ will_deduce = True
+ elif author_tuple:
+ if not last_entry.contains_author(author_tuple[0]):
+ last_entry.author_lines.append(author_tuple)
+ continue
+
+ if not line.startswith('\t'):
+ err = Error('line should start with a tab', line)
+ self.errors.append(err)
+ elif pr_line:
+ last_entry.prs.append(pr_line)
+ else:
+ m = star_prefix_regex.match(line)
+ if m:
+ if len(m.group('spaces')) != 1:
+ msg = 'one space should follow asterisk'
+ self.errors.append(Error(msg, line))
+ else:
+ last_entry.lines.append(line)
+ else:
+ if last_entry.is_empty:
+ msg = 'first line should start with a tab, ' \
+ 'an asterisk and a space'
+ self.errors.append(Error(msg, line))
+ else:
+ last_entry.lines.append(line)
+
+ def parse_file_names(self):
+ for entry in self.changelog_entries:
+ entry.parse_file_names()
+
+ def check_file_patterns(self):
+ for entry in self.changelog_entries:
+ for pattern in entry.file_patterns:
+ name = os.path.join(entry.folder, pattern)
+ if name not in wildcard_prefixes:
+ msg = 'unsupported wildcard prefix'
+ self.errors.append(Error(msg, name))
+
+ def check_for_empty_description(self):
+ for entry in self.changelog_entries:
+ for i, line in enumerate(entry.lines):
+ if (star_prefix_regex.match(line) and line.endswith(':') and
+ (i == len(entry.lines) - 1
+ or star_prefix_regex.match(entry.lines[i + 1]))):
+ msg = 'missing description of a change'
+ self.errors.append(Error(msg, line))
+
+ def get_file_changelog_location(self, changelog_file):
+ for file in self.info.modified_files:
+ if file[0] == changelog_file:
+ # root ChangeLog file
+ return ''
+ index = file[0].find('/' + changelog_file)
+ if index != -1:
+ return file[0][:index]
+ return None
+
+ def deduce_changelog_locations(self):
+ for entry in self.changelog_entries:
+ if not entry.folder:
+ changelog = None
+ for file in entry.files:
+ location = self.get_file_changelog_location(file)
+ if (location == ''
+ or (location and location in changelog_locations)):
+ if changelog and changelog != location:
+ msg = 'could not deduce ChangeLog file, ' \
+ 'not unique location'
+ self.errors.append(Error(msg))
+ return
+ changelog = location
+ if changelog is not None:
+ entry.folder = changelog
+ else:
+ msg = 'could not deduce ChangeLog file'
+ self.errors.append(Error(msg))
+
+ @classmethod
+ def in_ignored_location(cls, path):
+ for ignored in ignored_prefixes:
+ if path.startswith(ignored):
+ return True
+ return False
+
+ @classmethod
+ def get_changelog_by_path(cls, path):
+ components = path.split('/')
+ while components:
+ if '/'.join(components) in changelog_locations:
+ break
+ components = components[:-1]
+ return '/'.join(components)
+
+ def check_mentioned_files(self):
+ folder_count = len([x.folder for x in self.changelog_entries])
+ assert folder_count == len(self.changelog_entries)
+
+ mentioned_files = set()
+ mentioned_patterns = []
+ used_patterns = set()
+ for entry in self.changelog_entries:
+ if not entry.files:
+ msg = 'no files mentioned for ChangeLog in directory'
+ self.errors.append(Error(msg, entry.folder))
+ assert not entry.folder.endswith('/')
+ for file in entry.files:
+ if not self.is_changelog_filename(file):
+ mentioned_files.add(os.path.join(entry.folder, file))
+ for pattern in entry.file_patterns:
+ mentioned_patterns.append(os.path.join(entry.folder, pattern))
+
+ cand = [x[0] for x in self.info.modified_files
+ if not self.is_changelog_filename(x[0])]
+ changed_files = set(cand)
+ for file in sorted(mentioned_files - changed_files):
+ msg = 'unchanged file mentioned in a ChangeLog'
+ self.errors.append(Error(msg, file))
+ for file in sorted(changed_files - mentioned_files):
+ if not self.in_ignored_location(file):
+ if file in self.new_files:
+ changelog_location = self.get_changelog_by_path(file)
+ # Python2: we cannot use next(filter(...))
+ entries = filter(lambda x: x.folder == changelog_location,
+ self.changelog_entries)
+ entries = list(entries)
+ entry = entries[0] if entries else None
+ if not entry:
+ prs = self.top_level_prs
+ if not prs:
+ # if all ChangeLog entries have identical PRs
+ # then use them
+ prs = self.changelog_entries[0].prs
+ for entry in self.changelog_entries:
+ if entry.prs != prs:
+ prs = []
+ break
+ entry = ChangeLogEntry(changelog_location,
+ self.top_level_authors,
+ prs)
+ self.changelog_entries.append(entry)
+ # strip prefix of the file
+ assert file.startswith(entry.folder)
+ file = file[len(entry.folder):].lstrip('/')
+ entry.lines.append('\t* %s: New file.' % file)
+ entry.files.append(file)
+ else:
+ used_pattern = [p for p in mentioned_patterns
+ if file.startswith(p)]
+ used_pattern = used_pattern[0] if used_pattern else None
+ if used_pattern:
+ used_patterns.add(used_pattern)
+ else:
+ msg = 'changed file not mentioned in a ChangeLog'
+ self.errors.append(Error(msg, file))
+
+ for pattern in mentioned_patterns:
+ if pattern not in used_patterns:
+ error = 'pattern doesn''t match any changed files'
+ self.errors.append(Error(error, pattern))
+
+ def check_for_correct_changelog(self):
+ for entry in self.changelog_entries:
+ for file in entry.files:
+ full_path = os.path.join(entry.folder, file)
+ changelog_location = self.get_changelog_by_path(full_path)
+ if changelog_location != entry.folder:
+ msg = 'wrong ChangeLog location "%s", should be "%s"'
+ err = Error(msg % (entry.folder, changelog_location), file)
+ self.errors.append(err)
+
+ @classmethod
+ def format_authors_in_changelog(cls, authors, timestamp, prefix=''):
+ output = ''
+ for i, author in enumerate(authors):
+ if i == 0:
+ output += '%s%s %s\n' % (prefix, timestamp, author)
+ else:
+ output += '%s\t %s\n' % (prefix, author)
+ output += '\n'
+ return output
+
+ def to_changelog_entries(self, use_commit_ts=False):
+ current_timestamp = self.info.date.strftime(DATE_FORMAT)
+ for entry in self.changelog_entries:
+ output = ''
+ timestamp = entry.datetime
+ if self.revert_commit:
+ timestamp = current_timestamp
+ orig_date = self.original_info.date
+ current_timestamp = orig_date.strftime(DATE_FORMAT)
+ elif self.cherry_pick_commit:
+ info = self.commit_to_info_hook(self.cherry_pick_commit)
+ # it can happen that it is a cherry-pick for a different
+ # repository
+ if info:
+ timestamp = info.date.strftime(DATE_FORMAT)
+ else:
+ timestamp = current_timestamp
+ elif not timestamp or use_commit_ts:
+ timestamp = current_timestamp
+ authors = entry.authors if entry.authors else [self.info.author]
+ # add Co-Authored-By authors to all ChangeLog entries
+ for author in self.co_authors:
+ if author not in authors:
+ authors.append(author)
+
+ if self.cherry_pick_commit or self.revert_commit:
+ original_author = self.original_info.author
+ output += self.format_authors_in_changelog([original_author],
+ current_timestamp)
+ if self.revert_commit:
+ output += '\tRevert:\n'
+ else:
+ output += '\tBackported from master:\n'
+ output += self.format_authors_in_changelog(authors,
+ timestamp, '\t')
+ else:
+ output += self.format_authors_in_changelog(authors, timestamp)
+ for pr in entry.prs:
+ output += '\t%s\n' % pr
+ for line in entry.lines:
+ output += line + '\n'
+ yield (entry.folder, output.rstrip())
+
+ def print_output(self):
+ for entry, output in self.to_changelog_entries():
+ print('------ %s/ChangeLog ------ ' % entry)
+ print(output)
+
+ def print_errors(self):
+ print('Errors:')
+ for error in self.errors:
+ print(error)