Recommit changes to global checks (#71171)

Recommits the changes from https://reviews.llvm.org/D148216. Explicitly named globals are now matched literally, instead of emitting a capture group for the name. This resolves #70047. Metadata and annotations, on the other hand, are captured and matched against by default, since their identifiers are not stable. The reasons for revert (#63746) have been fixed: The first issue, that of duplicated checkers, has already been resolved in #70050. This PR resolves the second issue listed in #63746, regarding the order of named and unnamed globals. This is fixed by recording the index of substrings containing global values, and sorting the checks according to that index before emitting them. This results in global value checks being emitted in the order they were seen instead of being grouped separately.
author: Henrik G. Olsson <hnrklssn@gmail.com> 2023-11-13 14:45:27 +0100
committer: GitHub <noreply@github.com> 2023-11-13 14:45:27 +0100
commit: e6eda66cbc5ebf424b184506fc6bb27bee3d293f (patch)
tree: 279686a6bb14a0023b9d8680d29ad2e5a2206c25 /llvm/utils/UpdateTestChecks/common.py
parent: 81330286f18fda3da2a69fb2dd12de0ac90115b7 (diff)
download: llvm-e6eda66cbc5ebf424b184506fc6bb27bee3d293f.zip
llvm-e6eda66cbc5ebf424b184506fc6bb27bee3d293f.tar.gz
llvm-e6eda66cbc5ebf424b184506fc6bb27bee3d293f.tar.bz2
1 files changed, 197 insertions, 21 deletions
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
index 88b2ccc2..d1fd884 100644
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -26,8 +26,10 @@ Version changelog:
    type/attributes.
 3: Opening parenthesis of function args is kept on the first LABEL line
    in case arguments are split to a separate SAME line.
+4: --check-globals now has a third option ('smart'). The others are now called
+   'none' and 'all'. 'smart' is the default.
 """
-DEFAULT_VERSION = 3
+DEFAULT_VERSION = 4
 
 
 class Regex(object):
@@ -228,6 +230,8 @@ def parse_args(parser, argv):
     _verbose = args.verbose
     _global_value_regex = args.global_value_regex
     _global_hex_value_regex = args.global_hex_value_regex
+    if "check_globals" in args and args.check_globals == "default":
+        args.check_globals = "none" if args.version < 4 else "smart"
     return args
 
 
@@ -332,8 +336,8 @@ def itertests(
                 input_lines = [l.rstrip() for l in f]
             first_line = input_lines[0] if input_lines else ""
             if UTC_AVOID in first_line:
-              warn("Skipping test that must not be autogenerated: " + test)
-              continue
+                warn("Skipping test that must not be autogenerated: " + test)
+                continue
             is_regenerate = UTC_ADVERT in first_line
 
             # If we're generating a new test, set the default version to the latest.
@@ -896,7 +900,9 @@ class NamelessValue:
         *,
         is_before_functions=False,
         is_number=False,
-        replace_number_with_counter=False
+        replace_number_with_counter=False,
+        match_literally=False,
+        interlaced_with_previous=False
     ):
         self.check_prefix = check_prefix
         self.check_key = check_key
@@ -908,6 +914,8 @@ class NamelessValue:
         # Some variable numbers (e.g. MCINST1234) will change based on unrelated
         # modifications to LLVM, replace those with an incrementing counter.
         self.replace_number_with_counter = replace_number_with_counter
+        self.match_literally = match_literally
+        self.interlaced_with_previous = interlaced_with_previous
         self.variable_mapping = {}
 
     # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
@@ -919,9 +927,10 @@ class NamelessValue:
         return self.global_ir_rhs_regexp is not None
 
     # Return the IR prefix and check prefix we use for this kind or IR value,
-    # e.g., (%, TMP) for locals.
+    # e.g., (%, TMP) for locals. If the IR prefix is a regex, return the prefix
+    # used in the IR output
     def get_ir_prefix_from_ir_value_match(self, match):
-        return self.ir_prefix, self.check_prefix
+        return re.search(self.ir_prefix, match[0])[0], self.check_prefix
 
     # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
     def get_ir_regex_from_ir_value_re_match(self, match):
@@ -990,8 +999,16 @@ ir_nameless_values = [
     NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
     NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
     NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
+    NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
     NamelessValue(
-        r"GLOB", "@", r"@", r'[a-zA-Z0-9_$"\\.-]+', r".+", is_before_functions=True
+        r"GLOBNAMED",
+        "@",
+        r"@",
+        r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
+        r".+",
+        is_before_functions=True,
+        match_literally=True,
+        interlaced_with_previous=True,
     ),
     NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
     NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
@@ -1003,6 +1020,19 @@ ir_nameless_values = [
     NamelessValue(r"META", "!", r"metadata ", r"![0-9]+", None),
     NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
     NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
+    NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
+]
+
+global_nameless_values = [
+    nameless_value
+    for nameless_value in ir_nameless_values
+    if nameless_value.global_ir_rhs_regexp is not None
+]
+# global variable names should be matched literally
+global_nameless_values_w_unstable_ids = [
+    nameless_value
+    for nameless_value in global_nameless_values
+    if not nameless_value.match_literally
 ]
 
 asm_nameless_values = [
@@ -1037,6 +1067,7 @@ analyze_nameless_values = [
     ),
 ]
 
+
 def createOrRegexp(old, new):
     if not old:
         return new
@@ -1060,7 +1091,7 @@ for nameless_value in ir_nameless_values:
     if nameless_value.global_ir_rhs_regexp is not None:
         match = "^" + match
     IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, match)
-IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)]|\Z)"
+IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)\}]|\Z)"
 IR_VALUE_RE = re.compile(
     IR_VALUE_REGEXP_PREFIX
     + r"("
@@ -1069,6 +1100,18 @@ IR_VALUE_RE = re.compile(
     + IR_VALUE_REGEXP_SUFFIX
 )
 
+GLOBAL_VALUE_REGEXP_STRING = r""
+for nameless_value in global_nameless_values_w_unstable_ids:
+    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
+    GLOBAL_VALUE_REGEXP_STRING = createOrRegexp(GLOBAL_VALUE_REGEXP_STRING, match)
+GLOBAL_VALUE_RE = re.compile(
+    IR_VALUE_REGEXP_PREFIX
+    + r"("
+    + GLOBAL_VALUE_REGEXP_STRING
+    + r")"
+    + IR_VALUE_REGEXP_SUFFIX
+)
+
 # Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments).
 ASM_VALUE_REGEXP_STRING = ""
 for nameless_value in asm_nameless_values:
@@ -1101,6 +1144,7 @@ first_nameless_group_in_ir_value_match = 3
 variable_group_in_ir_value_match = 3
 attribute_group_in_ir_value_match = 4
 
+
 # Check a match for IR_VALUE_RE and inspect it to determine if it was a local
 # value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
 def get_idx_from_ir_value_match(match):
@@ -1226,6 +1270,20 @@ def generalize_check_lines(
     )
 
 
+def generalize_global_check_line(line, preserve_names, global_vars_seen):
+    [new_line] = generalize_check_lines_common(
+        [line],
+        False,
+        set(),
+        global_vars_seen,
+        global_nameless_values_w_unstable_ids,
+        GLOBAL_VALUE_RE,
+        False,
+        preserve_names,
+    )
+    return new_line
+
+
 def generalize_asm_check_lines(lines, vars_seen, global_vars_seen):
     return generalize_check_lines_common(
         lines,
@@ -1251,6 +1309,7 @@ def generalize_analyze_check_lines(lines, vars_seen, global_vars_seen):
         False,
     )
 
+
 def add_checks(
     output_lines,
     comment_marker,
@@ -1553,7 +1612,7 @@ def add_analyze_checks(
 
 
 def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
-    for nameless_value in itertools.chain(ir_nameless_values, asm_nameless_values):
+    for nameless_value in itertools.chain(global_nameless_values, asm_nameless_values):
         if nameless_value.global_ir_rhs_regexp is None:
             continue
 
@@ -1564,7 +1623,10 @@ def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
         global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
         lines = []
         for m in global_ir_value_re.finditer(raw_tool_output):
-            lines.append(m.group(0))
+            # Attach the substring's start index so that CHECK lines
+            # can be sorted properly even if they are matched by different nameless values.
+            # This is relevant for GLOB and GLOBNAMED since they may appear interlaced.
+            lines.append((m.start(), m.group(0)))
 
         for prefix in prefixes:
             if glob_val_dict[prefix] is None:
@@ -1580,6 +1642,86 @@ def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
             glob_val_dict[prefix][nameless_value.check_prefix] = lines
 
 
+def filter_globals_according_to_preference(
+    global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting
+):
+    if global_check_setting == "none":
+        return []
+    if global_check_setting == "all":
+        return global_val_lines_w_index
+    assert global_check_setting == "smart"
+
+    if nameless_value.check_key == "#":
+        # attribute sets are usually better checked by --check-attributes
+        return []
+
+    def extract(line, nv):
+        p = (
+            "^"
+            + nv.ir_prefix
+            + "("
+            + nv.ir_regexp
+            + ") = ("
+            + nv.global_ir_rhs_regexp
+            + ")"
+        )
+        match = re.match(p, line)
+        return (match.group(1), re.findall(nv.ir_regexp, match.group(2)))
+
+    transitively_visible = set()
+    contains_refs_to = {}
+
+    def add(var):
+        nonlocal transitively_visible
+        nonlocal contains_refs_to
+        if var in transitively_visible:
+            return
+        transitively_visible.add(var)
+        if not var in contains_refs_to:
+            return
+        for x in contains_refs_to[var]:
+            add(x)
+
+    for i, line in global_val_lines_w_index:
+        (var, refs) = extract(line, nameless_value)
+        contains_refs_to[var] = refs
+    for var, check_key in global_vars_seen:
+        if check_key != nameless_value.check_key:
+            continue
+        add(var)
+    return [
+        (i, line)
+        for i, line in global_val_lines_w_index
+        if extract(line, nameless_value)[0] in transitively_visible
+    ]
+
+
+METADATA_FILTERS = [
+    (
+        r"(?<=\")(\w+ )?(\w+ version )[\d.]+(?: \([^)]+\))?",
+        r"{{.*}}\2{{.*}}",
+    ),  # preface with glob also, to capture optional CLANG_VENDOR
+    (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"),
+]
+METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS]
+
+
+def filter_unstable_metadata(line):
+    for f, replacement in METADATA_FILTERS_RE:
+        line = f.sub(replacement, line)
+    return line
+
+
+def flush_current_checks(output_lines, new_lines_w_index, comment_marker):
+    if not new_lines_w_index:
+        return
+    output_lines.append(comment_marker + SEPARATOR)
+    new_lines_w_index.sort()
+    for _, line in new_lines_w_index:
+        output_lines.append(line)
+    new_lines_w_index.clear()
+
+
 def add_global_checks(
     glob_val_dict,
     comment_marker,
@@ -1588,11 +1730,11 @@ def add_global_checks(
     global_vars_seen_dict,
     preserve_names,
     is_before_functions,
+    global_check_setting,
 ):
     printed_prefixes = set()
-    for nameless_value in ir_nameless_values:
-        if nameless_value.global_ir_rhs_regexp is None:
-            continue
+    output_lines_loc = {}  # Allows GLOB and GLOBNAMED to be sorted correctly
+    for nameless_value in global_nameless_values:
         if nameless_value.is_before_functions != is_before_functions:
             continue
         for p in prefix_list:
@@ -1616,26 +1758,41 @@ def add_global_checks(
 
                 check_lines = []
                 global_vars_seen_before = [key for key in global_vars_seen.keys()]
-                for line in glob_val_dict[checkprefix][nameless_value.check_prefix]:
+                lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix]
+                lines_w_index = filter_globals_according_to_preference(
+                    lines_w_index,
+                    global_vars_seen_before,
+                    nameless_value,
+                    global_check_setting,
+                )
+                for i, line in lines_w_index:
                     if _global_value_regex:
                         matched = False
                         for regex in _global_value_regex:
-                            if re.match("^@" + regex + " = ", line):
+                            if re.match("^@" + regex + " = ", line) or re.match(
+                                "^!" + regex + " = ", line
+                            ):
                                 matched = True
                                 break
                         if not matched:
                             continue
-                    tmp = generalize_check_lines(
-                        [line], False, set(), global_vars_seen, preserve_names
+                    new_line = generalize_global_check_line(
+                        line, preserve_names, global_vars_seen
                     )
-                    check_line = "%s %s: %s" % (comment_marker, checkprefix, tmp[0])
-                    check_lines.append(check_line)
+                    new_line = filter_unstable_metadata(new_line)
+                    check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)
+                    check_lines.append((i, check_line))
                 if not check_lines:
                     continue
 
-                output_lines.append(comment_marker + SEPARATOR)
+                if not checkprefix in output_lines_loc:
+                    output_lines_loc[checkprefix] = []
+                if not nameless_value.interlaced_with_previous:
+                    flush_current_checks(
+                        output_lines, output_lines_loc[checkprefix], comment_marker
+                    )
                 for check_line in check_lines:
-                    output_lines.append(check_line)
+                    output_lines_loc[checkprefix].append(check_line)
 
                 printed_prefixes.add((checkprefix, nameless_value.check_prefix))
 
@@ -1646,6 +1803,16 @@ def add_global_checks(
                 break
 
     if printed_prefixes:
+        for p in prefix_list:
+            if p[0] is None:
+                continue
+            for checkprefix in p[0]:
+                if checkprefix not in output_lines_loc:
+                    continue
+                flush_current_checks(
+                    output_lines, output_lines_loc[checkprefix], comment_marker
+                )
+                break
         output_lines.append(comment_marker + SEPARATOR)
     return printed_prefixes
 
@@ -1712,6 +1879,15 @@ def get_autogennote_suffix(parser, args):
         ):
             continue
         value = getattr(args, action.dest)
+        if action.dest == "check_globals":
+            default_value = "none" if args.version < 4 else "smart"
+            if value == default_value:
+                continue
+            autogenerated_note_args += action.option_strings[0] + " "
+            if args.version < 4 and value == "all":
+                continue
+            autogenerated_note_args += "%s " % value
+            continue
         if action.const is not None:  # action stores a constant (usually True/False)
             # Skip actions with different constant values (this happens with boolean
             # --foo/--no-foo options)
author	Henrik G. Olsson <hnrklssn@gmail.com>	2023-11-13 14:45:27 +0100
committer	GitHub <noreply@github.com>	2023-11-13 14:45:27 +0100
commit	e6eda66cbc5ebf424b184506fc6bb27bee3d293f (patch)
tree	279686a6bb14a0023b9d8680d29ad2e5a2206c25 /llvm/utils/UpdateTestChecks/common.py
parent	81330286f18fda3da2a69fb2dd12de0ac90115b7 (diff)
download	llvm-e6eda66cbc5ebf424b184506fc6bb27bee3d293f.zip llvm-e6eda66cbc5ebf424b184506fc6bb27bee3d293f.tar.gz llvm-e6eda66cbc5ebf424b184506fc6bb27bee3d293f.tar.bz2