Revert "update_test_checks: keep meta variables stable by default"

This reverts commit fb02f9ac84a6151e41aba8f7391edd132a9aaf14. Looks like some Python version incompatibility, will investigate.
author: Nicolai Hähnle <nicolai.haehnle@amd.com> 2024-03-08 04:24:28 +0100
committer: Nicolai Hähnle <nicolai.haehnle@amd.com> 2024-03-08 04:25:07 +0100
commit: b565126b4dbef0d9f5c9f3ef8f3489ff6581218f (patch)
tree: 3738e242856c20ceb583f92df2cef3f5e91e95b6 /llvm/utils/UpdateTestChecks/common.py
parent: 66bd3cd75b32ccfa8d228c200cf4fbf72d49fd1f (diff)
download: llvm-b565126b4dbef0d9f5c9f3ef8f3489ff6581218f.zip
llvm-b565126b4dbef0d9f5c9f3ef8f3489ff6581218f.tar.gz
llvm-b565126b4dbef0d9f5c9f3ef8f3489ff6581218f.tar.bz2
1 files changed, 19 insertions, 494 deletions
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
index f766d54..a3365fe 100644
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -1,8 +1,6 @@
 from __future__ import print_function
 
 import argparse
-import bisect
-import collections
 import copy
 import glob
 import itertools
@@ -12,7 +10,7 @@ import subprocess
 import sys
 import shlex
 
-from typing import List, Mapping, Set
+from typing import List
 
 ##### Common utilities for update_*test_checks.py
 
@@ -422,48 +420,6 @@ def should_add_line_to_output(
     return True
 
 
-def collect_original_check_lines(ti: TestInfo, prefix_set: set):
-    """
-    Collect pre-existing check lines into a dictionary `result` which is
-    returned.
-
-    result[func_name][prefix] is filled with a list of right-hand-sides of check
-    lines.
-    """
-    result = {}
-
-    current_function = None
-    for input_line_info in ti.ro_iterlines():
-        input_line = input_line_info.line
-        if current_function is not None:
-            if input_line == "":
-                continue
-            if input_line.lstrip().startswith(";"):
-                m = CHECK_RE.match(input_line)
-                if (
-                    m is not None
-                    and m.group(1) in prefix_set
-                    and m.group(2) not in ["LABEL", "SAME"]
-                ):
-                    if m.group(1) not in current_function:
-                        current_function[m.group(1)] = []
-                    current_function[m.group(1)].append(input_line[m.end() :].strip())
-                continue
-            current_function = None
-
-        m = IR_FUNCTION_RE.match(input_line)
-        if m is not None:
-            func_name = m.group(1)
-            if ti.args.function is not None and func_name != ti.args.function:
-                # When filtering on a specific function, skip all others.
-                continue
-
-            assert func_name not in result
-            current_function = result[func_name] = {}
-
-    return result
-
-
 # Perform lit-like substitutions
 def getSubstitutions(sourcepath):
     sourcedir = os.path.dirname(sourcepath)
@@ -535,7 +491,7 @@ RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
 CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
 PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
 CHECK_RE = re.compile(
-    r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:"
+    r"^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:"
 )
 CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:")
 
@@ -1231,325 +1187,20 @@ def may_clash_with_default_check_prefix_name(check_prefix, var):
     )
 
 
-def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
-    """
-    Find a large ordered matching between strings in lhs and rhs.
-
-    Think of this as finding the *unchanged* lines in a diff, where the entries
-    of lhs and rhs are lines of the files being diffed.
-
-    Returns a list of matched (lhs_idx, rhs_idx) pairs.
-    """
-
-    if not lhs or not rhs:
-        return []
-
-    # Collect matches in reverse order.
-    matches = []
-
-    # First, collect a set of candidate matching edges. We limit this to a
-    # constant multiple of the input size to avoid quadratic runtime.
-    patterns = collections.defaultdict(lambda: ([], []))
-
-    for idx in range(len(lhs)):
-        patterns[lhs[idx]][0].append(idx)
-    for idx in range(len(rhs)):
-        patterns[rhs[idx]][1].append(idx)
-
-    multiple_patterns = []
-
-    candidates = []
-    for pattern in patterns.values():
-        if not pattern[0] or not pattern[1]:
-            continue
-
-        if len(pattern[0]) == len(pattern[1]) == 1:
-            candidates.append((pattern[0][0], pattern[1][0]))
-        else:
-            multiple_patterns.append(pattern)
-
-    multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
-
-    for pattern in multiple_patterns:
-        if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (
-            len(lhs) + len(rhs)
-        ):
-            break
-        for lhs_idx in pattern[0]:
-            for rhs_idx in pattern[1]:
-                candidates.append((lhs_idx, rhs_idx))
-
-    if not candidates:
-        # The LHS and RHS either share nothing in common, or lines are just too
-        # identical. In that case, let's give up and not match anything.
-        return []
-
-    # Compute a maximal crossing-free matching via an algorithm that is
-    # inspired by a mixture of dynamic programming and line-sweeping in
-    # discrete geometry.
-    #
-    # I would be surprised if this algorithm didn't exist somewhere in the
-    # literature, but I found it without consciously recalling any
-    # references, so you'll have to make do with the explanation below.
-    # Sorry.
-    #
-    # The underlying graph is bipartite:
-    #  - nodes on the LHS represent lines in the original check
-    #  - nodes on the RHS represent lines in the new (updated) check
-    #
-    # Nodes are implicitly sorted by the corresponding line number.
-    # Edges (unique_matches) are sorted by the line number on the LHS.
-    #
-    # Here's the geometric intuition for the algorithm.
-    #
-    #  * Plot the edges as points in the plane, with the original line
-    #    number on the X axis and the updated line number on the Y axis.
-    #  * The goal is to find a longest "chain" of points where each point
-    #    is strictly above and to the right of the previous point.
-    #  * The algorithm proceeds by sweeping a vertical line from left to
-    #    right.
-    #  * The algorithm maintains a table where `table[N]` answers the
-    #    question "What is currently the 'best' way to build a chain of N+1
-    #    points to the left of the vertical line". Here, 'best' means
-    #    that the last point of the chain is a as low as possible (minimal
-    #    Y coordinate).
-    #   * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
-    #     the last point in the chain and `y` is its Y coordinate
-    #   * A key invariant is that the Y values in the table are
-    #     monotonically increasing
-    #  * Thanks to these properties, the table can be used to answer the
-    #    question "What is the longest chain that can be built to the left
-    #    of the vertical line using only points below a certain Y value",
-    #    using a binary search over the table.
-    #  * The algorithm also builds a backlink structure in which every point
-    #    links back to the previous point on a best (longest) chain ending
-    #    at that point
-    #
-    # The core loop of the algorithm sweeps the line and updates the table
-    # and backlink structure for every point that we cross during the sweep.
-    # Therefore, the algorithm is trivially O(M log M) in the number of
-    # points.
-    candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
-
-    backlinks = []
-    table = []
-    for _, rhs_idx in candidates:
-        candidate_idx = len(backlinks)
-        ti = bisect.bisect_left(table, rhs_idx, key=lambda entry: entry[0])
-
-        # Update the table to record a best chain ending in the current point.
-        # There always is one, and if any of the previously visited points had
-        # a higher Y coordinate, then there is always a previously recorded best
-        # chain that can be improved upon by using the current point.
-        #
-        # There is only one case where there is some ambiguity. If the
-        # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as
-        # the current point (this can only happen if the same line appeared
-        # multiple times on the LHS), then we could choose to keep the
-        # previously recorded best chain instead. That would bias the algorithm
-        # differently but should have no systematic impact on the quality of the
-        # result.
-        if ti < len(table):
-            table[ti] = (rhs_idx, candidate_idx)
-        else:
-            table.append((rhs_idx, candidate_idx))
-        if ti > 0:
-            backlinks.append(table[ti - 1][1])
-        else:
-            backlinks.append(None)
-
-    # Commit to names in the matching by walking the backlinks. Recursively
-    # attempt to fill in more matches in-betweem.
-    match_idx = table[-1][1]
-    while match_idx is not None:
-        current = candidates[match_idx]
-        matches.append(current)
-        match_idx = backlinks[match_idx]
-
-    matches.reverse()
-    return matches
-
-
-VARIABLE_TAG = "[[@@]]"
-METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
-NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
-
-
-class CheckValueInfo:
-    def __init__(
-        self,
-        nameless_value: NamelessValue,
-        var: str,
-        prefix: str,
-    ):
-        self.nameless_value = nameless_value
-        self.var = var
-        self.prefix = prefix
-
-
-# Represent a check line in a way that allows us to compare check lines while
-# ignoring some or all of the FileCheck variable names.
-class CheckLineInfo:
-    def __init__(self, line, values):
-        # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
-        self.line: str = line
-
-        # Information on each FileCheck variable name occurrences in the line
-        self.values: List[CheckValueInfo] = values
-
-    def __repr__(self):
-        return f"CheckLineInfo(line={self.line}, self.values={self.values})"
-
-
-def remap_metavar_names(
-    old_line_infos: List[CheckLineInfo],
-    new_line_infos: List[CheckLineInfo],
-    committed_names: Set[str],
-) -> Mapping[str, str]:
-    """
-    Map all FileCheck variable names that appear in new_line_infos to new
-    FileCheck variable names in an attempt to reduce the diff from old_line_infos
-    to new_line_infos.
-
-    This is done by:
-    * Matching old check lines and new check lines using a diffing algorithm
-      applied after replacing names with wildcards.
-    * Committing to variable names such that the matched lines become equal
-      (without wildcards) if possible
-    * This is done recursively to handle cases where many lines are equal
-      after wildcard replacement
-    """
-    # Initialize uncommitted identity mappings
-    new_mapping = {}
-    for line in new_line_infos:
-        for value in line.values:
-            new_mapping[value.var] = value.var
-
-    # Recursively commit to the identity mapping or find a better one
-    def recurse(old_begin, old_end, new_begin, new_end):
-        if old_begin == old_end or new_begin == new_end:
-            return
-
-        # Find a matching of lines where uncommitted names are replaced
-        # with a placeholder.
-        def diffify_line(line, mapper):
-            values = []
-            for value in line.values:
-                mapped = mapper(value.var)
-                values.append(mapped if mapped in committed_names else "?")
-            return line.line.strip() + " @@@ " + " @ ".join(values)
-
-        lhs_lines = [
-            diffify_line(line, lambda x: x)
-            for line in old_line_infos[old_begin:old_end]
-        ]
-        rhs_lines = [
-            diffify_line(line, lambda x: new_mapping[x])
-            for line in new_line_infos[new_begin:new_end]
-        ]
-
-        candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
-
-        # Apply commits greedily on a match-by-match basis
-        matches = [(-1, -1)]
-        committed_anything = False
-        for lhs_idx, rhs_idx in candidate_matches:
-            lhs_line = old_line_infos[lhs_idx]
-            rhs_line = new_line_infos[rhs_idx]
-
-            local_commits = {}
-
-            for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
-                if new_mapping[rhs_value.var] in committed_names:
-                    # The new value has already been committed. If it was mapped
-                    # to the same name as the original value, we can consider
-                    # committing other values from this line. Otherwise, we
-                    # should ignore this line.
-                    if new_mapping[rhs_value.var] == lhs_value.var:
-                        continue
-                    else:
-                        break
-
-                if rhs_value.var in local_commits:
-                    # Same, but for a possible commit happening on the same line
-                    if local_commits[rhs_value.var] == lhs_value.var:
-                        continue
-                    else:
-                        break
-
-                if lhs_value.var in committed_names:
-                    # We can't map this value because the name we would map it to has already been
-                    # committed for something else. Give up on this line.
-                    break
-
-                local_commits[rhs_value.var] = lhs_value.var
-            else:
-                # No reason not to add any commitments for this line
-                for rhs_var, lhs_var in local_commits.items():
-                    new_mapping[rhs_var] = lhs_var
-                    committed_names.add(lhs_var)
-                    committed_anything = True
-
-                    if (
-                        lhs_var != rhs_var
-                        and lhs_var in new_mapping
-                        and new_mapping[lhs_var] == lhs_var
-                    ):
-                        new_mapping[lhs_var] = "conflict_" + lhs_var
-
-                matches.append((lhs_idx, rhs_idx))
-
-        matches.append((old_end, new_end))
-
-        # Recursively handle sequences between matches
-        if committed_anything:
-            for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]):
-                recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next)
-
-    recurse(0, len(old_line_infos), 0, len(new_line_infos))
-
-    # Commit to remaining names and resolve conflicts
-    for new_name, mapped_name in new_mapping.items():
-        if mapped_name in committed_names:
-            continue
-        if not mapped_name.startswith("conflict_"):
-            assert mapped_name == new_name
-            committed_names.add(mapped_name)
-
-    for new_name, mapped_name in new_mapping.items():
-        if mapped_name in committed_names:
-            continue
-        assert mapped_name.startswith("conflict_")
-
-        m = NUMERIC_SUFFIX_RE.search(new_name)
-        base_name = new_name[: m.start()]
-        suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1
-        while True:
-            candidate = f"{base_name}{suffix}"
-            if candidate not in committed_names:
-                new_mapping[new_name] = candidate
-                committed_names.add(candidate)
-                break
-            suffix += 1
-
-    return new_mapping
-
-
 def generalize_check_lines_common(
     lines,
     is_analyze,
     vars_seen,
     global_vars_seen,
     nameless_values,
-    nameless_value_regex: re.Pattern,
+    nameless_value_regex,
     is_asm,
     preserve_names,
-    original_check_lines=None,
 ):
     # This gets called for each match that occurs in
     # a line. We transform variables we haven't seen
     # into defs, and variables we have seen into uses.
-    def transform_line_vars(match, transform_locals=True):
+    def transform_line_vars(match):
         var = get_name_from_ir_value_match(match)
         nameless_value = get_nameless_value_from_match(match, nameless_values)
         if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
@@ -1559,8 +1210,6 @@ def generalize_check_lines_common(
             )
         key = (var, nameless_value.check_key)
         is_local_def = nameless_value.is_local_def_ir_value()
-        if is_local_def and not transform_locals:
-            return None
         if is_local_def and key in vars_seen:
             rv = nameless_value.get_value_use(var, match)
         elif not is_local_def and key in global_vars_seen:
@@ -1579,15 +1228,13 @@ def generalize_check_lines_common(
         # including the commas and spaces.
         return match.group(1) + rv + match.group(match.lastindex)
 
-    def transform_non_local_line_vars(match):
-        return transform_line_vars(match, False)
-
+    lines_with_def = []
     multiple_braces_re = re.compile(r"({{+)|(}}+)")
     def escape_braces(match_obj):
         return '{{' + re.escape(match_obj.group(0)) + '}}'
 
-    if not is_asm and not is_analyze:
-        for i, line in enumerate(lines):
+    for i, line in enumerate(lines):
+        if not is_asm and not is_analyze:
             # An IR variable named '%.' matches the FileCheck regex string.
             line = line.replace("%.", "%dot")
             for regex in _global_hex_value_regex:
@@ -1605,136 +1252,25 @@ def generalize_check_lines_common(
             # Ignore any comments, since the check lines will too.
             scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
             lines[i] = scrubbed_line
-
-    if not preserve_names:
-        if is_asm:
-            for i, _ in enumerate(lines):
-                # It can happen that two matches are back-to-back and for some reason sub
-                # will not replace both of them. For now we work around this by
-                # substituting until there is no more match.
-                changed = True
-                while changed:
-                    (lines[i], changed) = nameless_value_regex.subn(
-                        transform_line_vars, lines[i], count=1
-                    )
-        else:
-            # LLVM IR case. Start by handling global meta variables (global IR variables,
-            # metadata, attributes)
-            for i, _ in enumerate(lines):
-                start = 0
-                while True:
-                    m = nameless_value_regex.search(lines[i][start:])
-                    if m is None:
-                        break
-                    start += m.start()
-                    sub = transform_non_local_line_vars(m)
-                    if sub is not None:
-                        lines[i] = (
-                            lines[i][:start] + sub + lines[i][start + len(m.group(0)) :]
-                        )
-                    start += 1
-
-            # Collect information about new check lines and original check lines (if any)
-            new_line_infos = []
-            for line in lines:
-                filtered_line = ""
-                values = []
-                while True:
-                    m = nameless_value_regex.search(line)
-                    if m is None:
-                        filtered_line += line
-                        break
-
-                    var = get_name_from_ir_value_match(m)
-                    nameless_value = get_nameless_value_from_match(m, nameless_values)
-                    var = nameless_value.get_value_name(
-                        var, nameless_value.check_prefix
-                    )
-
-                    # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
-                    filtered_line += (
-                        line[: m.start()]
-                        + m.group(1)
-                        + VARIABLE_TAG
-                        + m.group(m.lastindex)
-                    )
-                    line = line[m.end() :]
-                    values.append(
-                        CheckValueInfo(
-                            nameless_value=nameless_value,
-                            var=var,
-                            prefix=nameless_value.get_ir_prefix_from_ir_value_match(m)[
-                                0
-                            ],
-                        )
-                    )
-                new_line_infos.append(CheckLineInfo(filtered_line, values))
-
-            orig_line_infos = []
-            for line in original_check_lines or []:
-                filtered_line = ""
-                values = []
-                while True:
-                    m = METAVAR_RE.search(line)
-                    if m is None:
-                        filtered_line += line
-                        break
-
-                    # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
-                    filtered_line += line[: m.start()] + VARIABLE_TAG
-                    line = line[m.end() :]
-                    values.append(
-                        CheckValueInfo(
-                            nameless_value=None,
-                            var=m.group(1),
-                            prefix=None,
-                        )
-                    )
-                orig_line_infos.append(CheckLineInfo(filtered_line, values))
-
-            # Compute the variable name mapping
-            committed_names = set(vars_seen)
-
-            mapping = remap_metavar_names(
-                orig_line_infos, new_line_infos, committed_names
-            )
-
-            for i, line_info in enumerate(new_line_infos):
-                line_template = line_info.line
-                line = ""
-
-                for value in line_info.values:
-                    idx = line_template.find(VARIABLE_TAG)
-                    line += line_template[:idx]
-                    line_template = line_template[idx + len(VARIABLE_TAG) :]
-
-                    key = (mapping[value.var], nameless_value.check_key)
-                    is_local_def = nameless_value.is_local_def_ir_value()
-                    if is_local_def:
-                        if mapping[value.var] in vars_seen:
-                            line += f"[[{mapping[value.var]}]]"
-                        else:
-                            line += f"[[{mapping[value.var]}:{value.prefix}{value.nameless_value.get_ir_regex()}]]"
-                            vars_seen.add(mapping[value.var])
-                    else:
-                        raise RuntimeError("not implemented")
-
-                line += line_template
-
-                lines[i] = line
-
-    if is_analyze:
-        for i, _ in enumerate(lines):
+        if not preserve_names:
+            # It can happen that two matches are back-to-back and for some reason sub
+            # will not replace both of them. For now we work around this by
+            # substituting until there is no more match.
+            changed = True
+            while changed:
+                (lines[i], changed) = nameless_value_regex.subn(
+                    transform_line_vars, lines[i], count=1
+                )
+        if is_analyze:
             # Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
             scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
             lines[i] = scrubbed_line
-
     return lines
 
 
 # Replace IR value defs and uses with FileCheck variables.
 def generalize_check_lines(
-    lines, is_analyze, vars_seen, global_vars_seen, preserve_names, original_check_lines
+    lines, is_analyze, vars_seen, global_vars_seen, preserve_names
 ):
     return generalize_check_lines_common(
         lines,
@@ -1745,7 +1281,6 @@ def generalize_check_lines(
         IR_VALUE_RE,
         False,
         preserve_names,
-        original_check_lines=original_check_lines,
     )
 
 
@@ -1802,7 +1337,6 @@ def add_checks(
     global_vars_seen_dict,
     is_filtered,
     preserve_names=False,
-    original_check_lines: Mapping[str, List[str]] = {},
 ):
     # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
     prefix_exclusions = set()
@@ -1875,7 +1409,6 @@ def add_checks(
                     vars_seen,
                     global_vars_seen,
                     preserve_names,
-                    original_check_lines=[],
                 )[0]
             func_name_separator = func_dict[checkprefix][func_name].func_name_separator
             if "[[" in args_and_sig:
@@ -1983,12 +1516,7 @@ def add_checks(
             # to variable naming fashions.
             else:
                 func_body = generalize_check_lines(
-                    func_body,
-                    False,
-                    vars_seen,
-                    global_vars_seen,
-                    preserve_names,
-                    original_check_lines=original_check_lines.get(checkprefix),
+                    func_body, False, vars_seen, global_vars_seen, preserve_names
                 )
 
                 # This could be selectively enabled with an optional invocation argument.
@@ -2050,7 +1578,6 @@ def add_ir_checks(
     version,
     global_vars_seen_dict,
     is_filtered,
-    original_check_lines={},
 ):
     # Label format is based on IR string.
     if function_sig and version > 1:
@@ -2075,7 +1602,6 @@ def add_ir_checks(
         global_vars_seen_dict,
         is_filtered,
         preserve_names,
-        original_check_lines=original_check_lines,
     )
 
 
@@ -2364,7 +1890,6 @@ def get_autogennote_suffix(parser, args):
             "llvm_bin",
             "verbose",
             "force_update",
-            "reset_variable_names",
         ):
             continue
         value = getattr(args, action.dest)
author	Nicolai Hähnle <nicolai.haehnle@amd.com>	2024-03-08 04:24:28 +0100
committer	Nicolai Hähnle <nicolai.haehnle@amd.com>	2024-03-08 04:25:07 +0100
commit	b565126b4dbef0d9f5c9f3ef8f3489ff6581218f (patch)
tree	3738e242856c20ceb583f92df2cef3f5e91e95b6 /llvm/utils/UpdateTestChecks/common.py
parent	66bd3cd75b32ccfa8d228c200cf4fbf72d49fd1f (diff)
download	llvm-b565126b4dbef0d9f5c9f3ef8f3489ff6581218f.zip llvm-b565126b4dbef0d9f5c9f3ef8f3489ff6581218f.tar.gz llvm-b565126b4dbef0d9f5c9f3ef8f3489ff6581218f.tar.bz2