diff options
author | Henrik G. Olsson <hnrklssn@gmail.com> | 2023-11-13 14:45:27 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-13 14:45:27 +0100 |
commit | e6eda66cbc5ebf424b184506fc6bb27bee3d293f (patch) | |
tree | 279686a6bb14a0023b9d8680d29ad2e5a2206c25 /llvm/utils/UpdateTestChecks/common.py | |
parent | 81330286f18fda3da2a69fb2dd12de0ac90115b7 (diff) | |
download | llvm-e6eda66cbc5ebf424b184506fc6bb27bee3d293f.zip llvm-e6eda66cbc5ebf424b184506fc6bb27bee3d293f.tar.gz llvm-e6eda66cbc5ebf424b184506fc6bb27bee3d293f.tar.bz2 |
Recommit changes to global checks (#71171)
Recommits the changes from https://reviews.llvm.org/D148216.
Explicitly named globals are now matched literally, instead of emitting
a capture group for the name. This resolves #70047.
Metadata and annotations, on the other hand, are captured and matched
against by default, since their identifiers are not stable.
The reasons for revert (#63746) have been fixed:
The first issue, that of duplicated checkers, has already been resolved
in #70050.
This PR resolves the second issue listed in #63746, regarding the order
of named and unnamed globals. This is fixed by recording the index of
substrings containing global values, and sorting the checks according to
that index before emitting them. This results in global value checks
being emitted in the order they were seen instead of being grouped
separately.
Diffstat (limited to 'llvm/utils/UpdateTestChecks/common.py')
-rw-r--r-- | llvm/utils/UpdateTestChecks/common.py | 218 |
1 files changed, 197 insertions, 21 deletions
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 88b2ccc2..d1fd884 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -26,8 +26,10 @@ Version changelog: type/attributes. 3: Opening parenthesis of function args is kept on the first LABEL line in case arguments are split to a separate SAME line. +4: --check-globals now has a third option ('smart'). The others are now called + 'none' and 'all'. 'smart' is the default. """ -DEFAULT_VERSION = 3 +DEFAULT_VERSION = 4 class Regex(object): @@ -228,6 +230,8 @@ def parse_args(parser, argv): _verbose = args.verbose _global_value_regex = args.global_value_regex _global_hex_value_regex = args.global_hex_value_regex + if "check_globals" in args and args.check_globals == "default": + args.check_globals = "none" if args.version < 4 else "smart" return args @@ -332,8 +336,8 @@ def itertests( input_lines = [l.rstrip() for l in f] first_line = input_lines[0] if input_lines else "" if UTC_AVOID in first_line: - warn("Skipping test that must not be autogenerated: " + test) - continue + warn("Skipping test that must not be autogenerated: " + test) + continue is_regenerate = UTC_ADVERT in first_line # If we're generating a new test, set the default version to the latest. @@ -896,7 +900,9 @@ class NamelessValue: *, is_before_functions=False, is_number=False, - replace_number_with_counter=False + replace_number_with_counter=False, + match_literally=False, + interlaced_with_previous=False ): self.check_prefix = check_prefix self.check_key = check_key @@ -908,6 +914,8 @@ class NamelessValue: # Some variable numbers (e.g. MCINST1234) will change based on unrelated # modifications to LLVM, replace those with an incrementing counter. self.replace_number_with_counter = replace_number_with_counter + self.match_literally = match_literally + self.interlaced_with_previous = interlaced_with_previous self.variable_mapping = {} # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'. @@ -919,9 +927,10 @@ class NamelessValue: return self.global_ir_rhs_regexp is not None # Return the IR prefix and check prefix we use for this kind or IR value, - # e.g., (%, TMP) for locals. + # e.g., (%, TMP) for locals. If the IR prefix is a regex, return the prefix + # used in the IR output def get_ir_prefix_from_ir_value_match(self, match): - return self.ir_prefix, self.check_prefix + return re.search(self.ir_prefix, match[0])[0], self.check_prefix # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals def get_ir_regex_from_ir_value_re_match(self, match): @@ -990,8 +999,16 @@ ir_nameless_values = [ NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None), NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"), NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None), + NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True), NamelessValue( - r"GLOB", "@", r"@", r'[a-zA-Z0-9_$"\\.-]+', r".+", is_before_functions=True + r"GLOBNAMED", + "@", + r"@", + r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*", + r".+", + is_before_functions=True, + match_literally=True, + interlaced_with_previous=True, ), NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None), NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None), @@ -1003,6 +1020,19 @@ ir_nameless_values = [ NamelessValue(r"META", "!", r"metadata ", r"![0-9]+", None), NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"), NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None), + NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None), +] + +global_nameless_values = [ + nameless_value + for nameless_value in ir_nameless_values + if nameless_value.global_ir_rhs_regexp is not None +] +# global variable names should be matched literally +global_nameless_values_w_unstable_ids = [ + nameless_value + for nameless_value in global_nameless_values + if not nameless_value.match_literally ] asm_nameless_values = [ @@ -1037,6 +1067,7 @@ analyze_nameless_values = [ ), ] + def createOrRegexp(old, new): if not old: return new @@ -1060,7 +1091,7 @@ for nameless_value in ir_nameless_values: if nameless_value.global_ir_rhs_regexp is not None: match = "^" + match IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, match) -IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)]|\Z)" +IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)\}]|\Z)" IR_VALUE_RE = re.compile( IR_VALUE_REGEXP_PREFIX + r"(" @@ -1069,6 +1100,18 @@ IR_VALUE_RE = re.compile( + IR_VALUE_REGEXP_SUFFIX ) +GLOBAL_VALUE_REGEXP_STRING = r"" +for nameless_value in global_nameless_values_w_unstable_ids: + match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp) + GLOBAL_VALUE_REGEXP_STRING = createOrRegexp(GLOBAL_VALUE_REGEXP_STRING, match) +GLOBAL_VALUE_RE = re.compile( + IR_VALUE_REGEXP_PREFIX + + r"(" + + GLOBAL_VALUE_REGEXP_STRING + + r")" + + IR_VALUE_REGEXP_SUFFIX +) + # Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments). ASM_VALUE_REGEXP_STRING = "" for nameless_value in asm_nameless_values: @@ -1101,6 +1144,7 @@ first_nameless_group_in_ir_value_match = 3 variable_group_in_ir_value_match = 3 attribute_group_in_ir_value_match = 4 + # Check a match for IR_VALUE_RE and inspect it to determine if it was a local # value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above. def get_idx_from_ir_value_match(match): @@ -1226,6 +1270,20 @@ def generalize_check_lines( ) +def generalize_global_check_line(line, preserve_names, global_vars_seen): + [new_line] = generalize_check_lines_common( + [line], + False, + set(), + global_vars_seen, + global_nameless_values_w_unstable_ids, + GLOBAL_VALUE_RE, + False, + preserve_names, + ) + return new_line + + def generalize_asm_check_lines(lines, vars_seen, global_vars_seen): return generalize_check_lines_common( lines, @@ -1251,6 +1309,7 @@ def generalize_analyze_check_lines(lines, vars_seen, global_vars_seen): False, ) + def add_checks( output_lines, comment_marker, @@ -1553,7 +1612,7 @@ def add_analyze_checks( def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes): - for nameless_value in itertools.chain(ir_nameless_values, asm_nameless_values): + for nameless_value in itertools.chain(global_nameless_values, asm_nameless_values): if nameless_value.global_ir_rhs_regexp is None: continue @@ -1564,7 +1623,10 @@ def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes): global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) lines = [] for m in global_ir_value_re.finditer(raw_tool_output): - lines.append(m.group(0)) + # Attach the substring's start index so that CHECK lines + # can be sorted properly even if they are matched by different nameless values. + # This is relevant for GLOB and GLOBNAMED since they may appear interlaced. + lines.append((m.start(), m.group(0))) for prefix in prefixes: if glob_val_dict[prefix] is None: @@ -1580,6 +1642,86 @@ def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes): glob_val_dict[prefix][nameless_value.check_prefix] = lines +def filter_globals_according_to_preference( + global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting +): + if global_check_setting == "none": + return [] + if global_check_setting == "all": + return global_val_lines_w_index + assert global_check_setting == "smart" + + if nameless_value.check_key == "#": + # attribute sets are usually better checked by --check-attributes + return [] + + def extract(line, nv): + p = ( + "^" + + nv.ir_prefix + + "(" + + nv.ir_regexp + + ") = (" + + nv.global_ir_rhs_regexp + + ")" + ) + match = re.match(p, line) + return (match.group(1), re.findall(nv.ir_regexp, match.group(2))) + + transitively_visible = set() + contains_refs_to = {} + + def add(var): + nonlocal transitively_visible + nonlocal contains_refs_to + if var in transitively_visible: + return + transitively_visible.add(var) + if not var in contains_refs_to: + return + for x in contains_refs_to[var]: + add(x) + + for i, line in global_val_lines_w_index: + (var, refs) = extract(line, nameless_value) + contains_refs_to[var] = refs + for var, check_key in global_vars_seen: + if check_key != nameless_value.check_key: + continue + add(var) + return [ + (i, line) + for i, line in global_val_lines_w_index + if extract(line, nameless_value)[0] in transitively_visible + ] + + +METADATA_FILTERS = [ + ( + r"(?<=\")(\w+ )?(\w+ version )[\d.]+(?: \([^)]+\))?", + r"{{.*}}\2{{.*}}", + ), # preface with glob also, to capture optional CLANG_VENDOR + (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"), +] +METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS] + + +def filter_unstable_metadata(line): + for f, replacement in METADATA_FILTERS_RE: + line = f.sub(replacement, line) + return line + + +def flush_current_checks(output_lines, new_lines_w_index, comment_marker): + if not new_lines_w_index: + return + output_lines.append(comment_marker + SEPARATOR) + new_lines_w_index.sort() + for _, line in new_lines_w_index: + output_lines.append(line) + new_lines_w_index.clear() + + def add_global_checks( glob_val_dict, comment_marker, @@ -1588,11 +1730,11 @@ def add_global_checks( global_vars_seen_dict, preserve_names, is_before_functions, + global_check_setting, ): printed_prefixes = set() - for nameless_value in ir_nameless_values: - if nameless_value.global_ir_rhs_regexp is None: - continue + output_lines_loc = {} # Allows GLOB and GLOBNAMED to be sorted correctly + for nameless_value in global_nameless_values: if nameless_value.is_before_functions != is_before_functions: continue for p in prefix_list: @@ -1616,26 +1758,41 @@ def add_global_checks( check_lines = [] global_vars_seen_before = [key for key in global_vars_seen.keys()] - for line in glob_val_dict[checkprefix][nameless_value.check_prefix]: + lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix] + lines_w_index = filter_globals_according_to_preference( + lines_w_index, + global_vars_seen_before, + nameless_value, + global_check_setting, + ) + for i, line in lines_w_index: if _global_value_regex: matched = False for regex in _global_value_regex: - if re.match("^@" + regex + " = ", line): + if re.match("^@" + regex + " = ", line) or re.match( + "^!" + regex + " = ", line + ): matched = True break if not matched: continue - tmp = generalize_check_lines( - [line], False, set(), global_vars_seen, preserve_names + new_line = generalize_global_check_line( + line, preserve_names, global_vars_seen ) - check_line = "%s %s: %s" % (comment_marker, checkprefix, tmp[0]) - check_lines.append(check_line) + new_line = filter_unstable_metadata(new_line) + check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line) + check_lines.append((i, check_line)) if not check_lines: continue - output_lines.append(comment_marker + SEPARATOR) + if not checkprefix in output_lines_loc: + output_lines_loc[checkprefix] = [] + if not nameless_value.interlaced_with_previous: + flush_current_checks( + output_lines, output_lines_loc[checkprefix], comment_marker + ) for check_line in check_lines: - output_lines.append(check_line) + output_lines_loc[checkprefix].append(check_line) printed_prefixes.add((checkprefix, nameless_value.check_prefix)) @@ -1646,6 +1803,16 @@ def add_global_checks( break if printed_prefixes: + for p in prefix_list: + if p[0] is None: + continue + for checkprefix in p[0]: + if checkprefix not in output_lines_loc: + continue + flush_current_checks( + output_lines, output_lines_loc[checkprefix], comment_marker + ) + break output_lines.append(comment_marker + SEPARATOR) return printed_prefixes @@ -1712,6 +1879,15 @@ def get_autogennote_suffix(parser, args): ): continue value = getattr(args, action.dest) + if action.dest == "check_globals": + default_value = "none" if args.version < 4 else "smart" + if value == default_value: + continue + autogenerated_note_args += action.option_strings[0] + " " + if args.version < 4 and value == "all": + continue + autogenerated_note_args += "%s " % value + continue if action.const is not None: # action stores a constant (usually True/False) # Skip actions with different constant values (this happens with boolean # --foo/--no-foo options) |