diff options
author | Henrik G. Olsson <hnrklssn@gmail.com> | 2023-04-14 16:28:34 +0200 |
---|---|---|
committer | Henrik G. Olsson <hnrklssn@gmail.com> | 2023-07-05 14:04:50 +0200 |
commit | 8a3fdf7b908978625e9a7e57fbb443e4e6f98976 (patch) | |
tree | dcb0918e7837c2b59f28b21a44e836e0a4443502 /llvm/utils/UpdateTestChecks/common.py | |
parent | c6a4fc8ddfaf68d932b69b80a1efd0742fa8211a (diff) | |
download | llvm-8a3fdf7b908978625e9a7e57fbb443e4e6f98976.zip llvm-8a3fdf7b908978625e9a7e57fbb443e4e6f98976.tar.gz llvm-8a3fdf7b908978625e9a7e57fbb443e4e6f98976.tar.bz2 |
[UTC] Add fallback support for specific metadata, and check their defs
This prevents update_cc_tests.py from emitting hard-coded identifiers
for metadata (global variable checkers still check hard-coded
identifiers). Instead it emits regex checkers that match even if the
identifiers change. Also adds a new mode for --check-globals: instead of
simply being on or off, it now has the options 'none', 'smart' and
'all', with 'none' and 'all' corresponding to the previous modes.
The 'smart' mode only emits checks for global definitions referenced
in the IR or other metadata that itself has a definition checker
emitted, making the rule transitive. It does not emit checks for
attribute sets, since that is better checked by --check-attributes. This
mode is made the new default. To make the change in default mode
backwards compatible a version bump is introduced (to v3), and the
default remains 'none' in v1 & v2.
This will result in metadata checks being emitted more often, so filters
are added to not check absolute file paths and compiler version git
hashes.
rdar://105239218
Diffstat (limited to 'llvm/utils/UpdateTestChecks/common.py')
-rw-r--r-- | llvm/utils/UpdateTestChecks/common.py | 166 |
1 files changed, 151 insertions, 15 deletions
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index b22d7e3..b90e4f6 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -24,8 +24,10 @@ Version changelog: 1: Initial version, used by tests that don't specify --version explicitly. 2: --function-signature is now enabled by default and also checks return type/attributes. +3: --check-globals now has a third option ('smart'). The others are now called + 'none' and 'all'. 'smart' is the default. """ -DEFAULT_VERSION = 2 +DEFAULT_VERSION = 3 class Regex(object): @@ -220,6 +222,8 @@ def parse_args(parser, argv): args = parser.parse_args(argv) if args.version >= 2: args.function_signature = True + if "check_globals" in args and args.check_globals == "default": + args.check_globals = "none" if args.version < 3 else "smart" return args @@ -877,7 +881,8 @@ class NamelessValue: *, is_before_functions=False, is_number=False, - replace_number_with_counter=False + replace_number_with_counter=False, + match_literally=False, ): self.check_prefix = check_prefix self.check_key = check_key @@ -889,6 +894,7 @@ class NamelessValue: # Some variable numbers (e.g. MCINST1234) will change based on unrelated # modifications to LLVM, replace those with an incrementing counter. self.replace_number_with_counter = replace_number_with_counter + self.match_literally = match_literally self.variable_mapping = {} # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'. @@ -900,9 +906,10 @@ class NamelessValue: return self.global_ir_rhs_regexp is not None # Return the IR prefix and check prefix we use for this kind or IR value, - # e.g., (%, TMP) for locals. + # e.g., (%, TMP) for locals. If the IR prefix is a regex, return the prefix + # used in the IR output def get_ir_prefix_from_ir_value_match(self, match): - return self.ir_prefix, self.check_prefix + return re.search(self.ir_prefix, match[0])[0], self.check_prefix # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals def get_ir_regex_from_ir_value_re_match(self, match): @@ -971,8 +978,15 @@ ir_nameless_values = [ NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None), NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"), NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None), + NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True), NamelessValue( - r"GLOB", "@", r"@", r'[a-zA-Z0-9_$"\\.-]+', r".+", is_before_functions=True + r"GLOBNAMED", + "@", + r"@", + r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*", + r".+", + is_before_functions=True, + match_literally=True, ), NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None), NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None), @@ -984,6 +998,19 @@ ir_nameless_values = [ NamelessValue(r"META", "!", r"metadata ", r"![0-9]+", None), NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"), NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None), + NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None), +] + +global_nameless_values = [ + nameless_value + for nameless_value in ir_nameless_values + if nameless_value.global_ir_rhs_regexp is not None +] +# global variable names should be matched literally +global_nameless_values_w_unstable_ids = [ + nameless_value + for nameless_value in global_nameless_values + if not nameless_value.match_literally ] asm_nameless_values = [ @@ -1031,7 +1058,7 @@ for nameless_value in ir_nameless_values: if nameless_value.global_ir_rhs_regexp is not None: match = "^" + match IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, match) -IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)]|\Z)" +IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)\}]|\Z)" IR_VALUE_RE = re.compile( IR_VALUE_REGEXP_PREFIX + r"(" @@ -1040,6 +1067,18 @@ IR_VALUE_RE = re.compile( + IR_VALUE_REGEXP_SUFFIX ) +GLOBAL_VALUE_REGEXP_STRING = r"" +for nameless_value in global_nameless_values_w_unstable_ids: + match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp) + GLOBAL_VALUE_REGEXP_STRING = createOrRegexp(GLOBAL_VALUE_REGEXP_STRING, match) +GLOBAL_VALUE_RE = re.compile( + IR_VALUE_REGEXP_PREFIX + + r"(" + + GLOBAL_VALUE_REGEXP_STRING + + r")" + + IR_VALUE_REGEXP_SUFFIX +) + # Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments). ASM_VALUE_REGEXP_STRING = "" for nameless_value in asm_nameless_values: @@ -1058,6 +1097,7 @@ first_nameless_group_in_ir_value_match = 3 variable_group_in_ir_value_match = 3 attribute_group_in_ir_value_match = 4 + # Check a match for IR_VALUE_RE and inspect it to determine if it was a local # value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above. def get_idx_from_ir_value_match(match): @@ -1172,6 +1212,19 @@ def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen): ) +def generalize_global_check_line(line, is_analyze, global_vars_seen): + [new_line] = generalize_check_lines_common( + [line], + is_analyze, + set(), + global_vars_seen, + global_nameless_values_w_unstable_ids, + GLOBAL_VALUE_RE, + False, + ) + return new_line + + def generalize_asm_check_lines(lines, vars_seen, global_vars_seen): return generalize_check_lines_common( lines, @@ -1437,7 +1490,7 @@ def add_analyze_checks( def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes): - for nameless_value in itertools.chain(ir_nameless_values, asm_nameless_values): + for nameless_value in itertools.chain(global_nameless_values, asm_nameless_values): if nameless_value.global_ir_rhs_regexp is None: continue @@ -1464,6 +1517,74 @@ def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes): glob_val_dict[prefix][nameless_value.check_prefix] = lines +def filter_globals_according_to_preference( + global_val_lines, global_vars_seen, nameless_value, global_check_setting +): + if global_check_setting == "none": + return [] + if global_check_setting == "all": + return global_val_lines + assert global_check_setting == "smart" + + if nameless_value.check_key == "#": + # attribute sets are usually better checked by --check-attributes + return [] + + def extract(line, nv): + p = ( + "^" + + nv.ir_prefix + + "(" + + nv.ir_regexp + + ") = (" + + nv.global_ir_rhs_regexp + + ")" + ) + match = re.match(p, line) + return (match.group(1), re.findall(nv.ir_regexp, match.group(2))) + + transitively_visible = set() + contains_refs_to = {} + + def add(var): + nonlocal transitively_visible + nonlocal contains_refs_to + if var in transitively_visible: + return + transitively_visible.add(var) + if not var in contains_refs_to: + return + for x in contains_refs_to[var]: + add(x) + + for line in global_val_lines: + (var, refs) = extract(line, nameless_value) + contains_refs_to[var] = refs + for var, check_key in global_vars_seen: + if check_key != nameless_value.check_key: + continue + add(var) + return [ + line + for line in global_val_lines + if extract(line, nameless_value)[0] in transitively_visible + ] + + +# The capture group is kept as is, followed by a {{.*}} glob +METADATA_FILTERS = [ + r"(\w+ version )[\d.]+ \(git@[\w.:/-]+\.git \w+\)", + r'(!DIFile\(filename: ".+", directory: )".+"', +] +METADATA_FILTERS_RE = [re.compile(s) for s in METADATA_FILTERS] + + +def filter_unstable_metadata(line): + for f in METADATA_FILTERS_RE: + line = f.sub(r"\1{{.*}}", line) + return line + + def add_global_checks( glob_val_dict, comment_marker, @@ -1472,11 +1593,10 @@ def add_global_checks( global_vars_seen_dict, is_analyze, is_before_functions, + global_check_setting, ): printed_prefixes = set() - for nameless_value in ir_nameless_values: - if nameless_value.global_ir_rhs_regexp is None: - continue + for nameless_value in global_nameless_values: if nameless_value.is_before_functions != is_before_functions: continue for p in prefix_list: @@ -1500,19 +1620,26 @@ def add_global_checks( check_lines = [] global_vars_seen_before = [key for key in global_vars_seen.keys()] - for line in glob_val_dict[checkprefix][nameless_value.check_prefix]: + lines = glob_val_dict[checkprefix][nameless_value.check_prefix] + lines = filter_globals_according_to_preference( + lines, global_vars_seen_before, nameless_value, global_check_setting + ) + for line in lines: if _global_value_regex: matched = False for regex in _global_value_regex: - if re.match("^@" + regex + " = ", line): + if re.match("^@" + regex + " = ", line) or re.match( + "^!" + regex + " = ", line + ): matched = True break if not matched: continue - tmp = generalize_check_lines( - [line], is_analyze, set(), global_vars_seen + new_line = generalize_global_check_line( + line, is_analyze, global_vars_seen ) - check_line = "%s %s: %s" % (comment_marker, checkprefix, tmp[0]) + new_line = filter_unstable_metadata(new_line) + check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line) check_lines.append(check_line) if not check_lines: continue @@ -1596,6 +1723,15 @@ def get_autogennote_suffix(parser, args): ): continue value = getattr(args, action.dest) + if action.dest == "check_globals": + default_value = "none" if args.version < 3 else "smart" + if value == default_value: + continue + autogenerated_note_args += action.option_strings[0] + " " + if args.version < 3 and value == "all": + continue + autogenerated_note_args += "%s " % value + continue if action.const is not None: # action stores a constant (usually True/False) # Skip actions with different constant values (this happens with boolean # --foo/--no-foo options) |