aboutsummaryrefslogtreecommitdiff
path: root/llvm/utils/UpdateTestChecks/common.py
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2024-03-08 04:24:28 +0100
committerNicolai Hähnle <nicolai.haehnle@amd.com>2024-03-08 04:25:07 +0100
commitb565126b4dbef0d9f5c9f3ef8f3489ff6581218f (patch)
tree3738e242856c20ceb583f92df2cef3f5e91e95b6 /llvm/utils/UpdateTestChecks/common.py
parent66bd3cd75b32ccfa8d228c200cf4fbf72d49fd1f (diff)
downloadllvm-b565126b4dbef0d9f5c9f3ef8f3489ff6581218f.zip
llvm-b565126b4dbef0d9f5c9f3ef8f3489ff6581218f.tar.gz
llvm-b565126b4dbef0d9f5c9f3ef8f3489ff6581218f.tar.bz2
Revert "update_test_checks: keep meta variables stable by default"
This reverts commit fb02f9ac84a6151e41aba8f7391edd132a9aaf14. Looks like some Python version incompatibility, will investigate.
Diffstat (limited to 'llvm/utils/UpdateTestChecks/common.py')
-rw-r--r--llvm/utils/UpdateTestChecks/common.py513
1 files changed, 19 insertions, 494 deletions
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
index f766d54..a3365fe 100644
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -1,8 +1,6 @@
from __future__ import print_function
import argparse
-import bisect
-import collections
import copy
import glob
import itertools
@@ -12,7 +10,7 @@ import subprocess
import sys
import shlex
-from typing import List, Mapping, Set
+from typing import List
##### Common utilities for update_*test_checks.py
@@ -422,48 +420,6 @@ def should_add_line_to_output(
return True
-def collect_original_check_lines(ti: TestInfo, prefix_set: set):
- """
- Collect pre-existing check lines into a dictionary `result` which is
- returned.
-
- result[func_name][prefix] is filled with a list of right-hand-sides of check
- lines.
- """
- result = {}
-
- current_function = None
- for input_line_info in ti.ro_iterlines():
- input_line = input_line_info.line
- if current_function is not None:
- if input_line == "":
- continue
- if input_line.lstrip().startswith(";"):
- m = CHECK_RE.match(input_line)
- if (
- m is not None
- and m.group(1) in prefix_set
- and m.group(2) not in ["LABEL", "SAME"]
- ):
- if m.group(1) not in current_function:
- current_function[m.group(1)] = []
- current_function[m.group(1)].append(input_line[m.end() :].strip())
- continue
- current_function = None
-
- m = IR_FUNCTION_RE.match(input_line)
- if m is not None:
- func_name = m.group(1)
- if ti.args.function is not None and func_name != ti.args.function:
- # When filtering on a specific function, skip all others.
- continue
-
- assert func_name not in result
- current_function = result[func_name] = {}
-
- return result
-
-
# Perform lit-like substitutions
def getSubstitutions(sourcepath):
sourcedir = os.path.dirname(sourcepath)
@@ -535,7 +491,7 @@ RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$")
CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)")
PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$")
CHECK_RE = re.compile(
- r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:"
+ r"^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:"
)
CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:")
@@ -1231,325 +1187,20 @@ def may_clash_with_default_check_prefix_name(check_prefix, var):
)
-def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
- """
- Find a large ordered matching between strings in lhs and rhs.
-
- Think of this as finding the *unchanged* lines in a diff, where the entries
- of lhs and rhs are lines of the files being diffed.
-
- Returns a list of matched (lhs_idx, rhs_idx) pairs.
- """
-
- if not lhs or not rhs:
- return []
-
- # Collect matches in reverse order.
- matches = []
-
- # First, collect a set of candidate matching edges. We limit this to a
- # constant multiple of the input size to avoid quadratic runtime.
- patterns = collections.defaultdict(lambda: ([], []))
-
- for idx in range(len(lhs)):
- patterns[lhs[idx]][0].append(idx)
- for idx in range(len(rhs)):
- patterns[rhs[idx]][1].append(idx)
-
- multiple_patterns = []
-
- candidates = []
- for pattern in patterns.values():
- if not pattern[0] or not pattern[1]:
- continue
-
- if len(pattern[0]) == len(pattern[1]) == 1:
- candidates.append((pattern[0][0], pattern[1][0]))
- else:
- multiple_patterns.append(pattern)
-
- multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
-
- for pattern in multiple_patterns:
- if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (
- len(lhs) + len(rhs)
- ):
- break
- for lhs_idx in pattern[0]:
- for rhs_idx in pattern[1]:
- candidates.append((lhs_idx, rhs_idx))
-
- if not candidates:
- # The LHS and RHS either share nothing in common, or lines are just too
- # identical. In that case, let's give up and not match anything.
- return []
-
- # Compute a maximal crossing-free matching via an algorithm that is
- # inspired by a mixture of dynamic programming and line-sweeping in
- # discrete geometry.
- #
- # I would be surprised if this algorithm didn't exist somewhere in the
- # literature, but I found it without consciously recalling any
- # references, so you'll have to make do with the explanation below.
- # Sorry.
- #
- # The underlying graph is bipartite:
- # - nodes on the LHS represent lines in the original check
- # - nodes on the RHS represent lines in the new (updated) check
- #
- # Nodes are implicitly sorted by the corresponding line number.
- # Edges (unique_matches) are sorted by the line number on the LHS.
- #
- # Here's the geometric intuition for the algorithm.
- #
- # * Plot the edges as points in the plane, with the original line
- # number on the X axis and the updated line number on the Y axis.
- # * The goal is to find a longest "chain" of points where each point
- # is strictly above and to the right of the previous point.
- # * The algorithm proceeds by sweeping a vertical line from left to
- # right.
- # * The algorithm maintains a table where `table[N]` answers the
- # question "What is currently the 'best' way to build a chain of N+1
- # points to the left of the vertical line". Here, 'best' means
- # that the last point of the chain is a as low as possible (minimal
- # Y coordinate).
- # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
- # the last point in the chain and `y` is its Y coordinate
- # * A key invariant is that the Y values in the table are
- # monotonically increasing
- # * Thanks to these properties, the table can be used to answer the
- # question "What is the longest chain that can be built to the left
- # of the vertical line using only points below a certain Y value",
- # using a binary search over the table.
- # * The algorithm also builds a backlink structure in which every point
- # links back to the previous point on a best (longest) chain ending
- # at that point
- #
- # The core loop of the algorithm sweeps the line and updates the table
- # and backlink structure for every point that we cross during the sweep.
- # Therefore, the algorithm is trivially O(M log M) in the number of
- # points.
- candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
-
- backlinks = []
- table = []
- for _, rhs_idx in candidates:
- candidate_idx = len(backlinks)
- ti = bisect.bisect_left(table, rhs_idx, key=lambda entry: entry[0])
-
- # Update the table to record a best chain ending in the current point.
- # There always is one, and if any of the previously visited points had
- # a higher Y coordinate, then there is always a previously recorded best
- # chain that can be improved upon by using the current point.
- #
- # There is only one case where there is some ambiguity. If the
- # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as
- # the current point (this can only happen if the same line appeared
- # multiple times on the LHS), then we could choose to keep the
- # previously recorded best chain instead. That would bias the algorithm
- # differently but should have no systematic impact on the quality of the
- # result.
- if ti < len(table):
- table[ti] = (rhs_idx, candidate_idx)
- else:
- table.append((rhs_idx, candidate_idx))
- if ti > 0:
- backlinks.append(table[ti - 1][1])
- else:
- backlinks.append(None)
-
- # Commit to names in the matching by walking the backlinks. Recursively
- # attempt to fill in more matches in-betweem.
- match_idx = table[-1][1]
- while match_idx is not None:
- current = candidates[match_idx]
- matches.append(current)
- match_idx = backlinks[match_idx]
-
- matches.reverse()
- return matches
-
-
-VARIABLE_TAG = "[[@@]]"
-METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
-NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
-
-
-class CheckValueInfo:
- def __init__(
- self,
- nameless_value: NamelessValue,
- var: str,
- prefix: str,
- ):
- self.nameless_value = nameless_value
- self.var = var
- self.prefix = prefix
-
-
-# Represent a check line in a way that allows us to compare check lines while
-# ignoring some or all of the FileCheck variable names.
-class CheckLineInfo:
- def __init__(self, line, values):
- # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
- self.line: str = line
-
- # Information on each FileCheck variable name occurrences in the line
- self.values: List[CheckValueInfo] = values
-
- def __repr__(self):
- return f"CheckLineInfo(line={self.line}, self.values={self.values})"
-
-
-def remap_metavar_names(
- old_line_infos: List[CheckLineInfo],
- new_line_infos: List[CheckLineInfo],
- committed_names: Set[str],
-) -> Mapping[str, str]:
- """
- Map all FileCheck variable names that appear in new_line_infos to new
- FileCheck variable names in an attempt to reduce the diff from old_line_infos
- to new_line_infos.
-
- This is done by:
- * Matching old check lines and new check lines using a diffing algorithm
- applied after replacing names with wildcards.
- * Committing to variable names such that the matched lines become equal
- (without wildcards) if possible
- * This is done recursively to handle cases where many lines are equal
- after wildcard replacement
- """
- # Initialize uncommitted identity mappings
- new_mapping = {}
- for line in new_line_infos:
- for value in line.values:
- new_mapping[value.var] = value.var
-
- # Recursively commit to the identity mapping or find a better one
- def recurse(old_begin, old_end, new_begin, new_end):
- if old_begin == old_end or new_begin == new_end:
- return
-
- # Find a matching of lines where uncommitted names are replaced
- # with a placeholder.
- def diffify_line(line, mapper):
- values = []
- for value in line.values:
- mapped = mapper(value.var)
- values.append(mapped if mapped in committed_names else "?")
- return line.line.strip() + " @@@ " + " @ ".join(values)
-
- lhs_lines = [
- diffify_line(line, lambda x: x)
- for line in old_line_infos[old_begin:old_end]
- ]
- rhs_lines = [
- diffify_line(line, lambda x: new_mapping[x])
- for line in new_line_infos[new_begin:new_end]
- ]
-
- candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
-
- # Apply commits greedily on a match-by-match basis
- matches = [(-1, -1)]
- committed_anything = False
- for lhs_idx, rhs_idx in candidate_matches:
- lhs_line = old_line_infos[lhs_idx]
- rhs_line = new_line_infos[rhs_idx]
-
- local_commits = {}
-
- for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
- if new_mapping[rhs_value.var] in committed_names:
- # The new value has already been committed. If it was mapped
- # to the same name as the original value, we can consider
- # committing other values from this line. Otherwise, we
- # should ignore this line.
- if new_mapping[rhs_value.var] == lhs_value.var:
- continue
- else:
- break
-
- if rhs_value.var in local_commits:
- # Same, but for a possible commit happening on the same line
- if local_commits[rhs_value.var] == lhs_value.var:
- continue
- else:
- break
-
- if lhs_value.var in committed_names:
- # We can't map this value because the name we would map it to has already been
- # committed for something else. Give up on this line.
- break
-
- local_commits[rhs_value.var] = lhs_value.var
- else:
- # No reason not to add any commitments for this line
- for rhs_var, lhs_var in local_commits.items():
- new_mapping[rhs_var] = lhs_var
- committed_names.add(lhs_var)
- committed_anything = True
-
- if (
- lhs_var != rhs_var
- and lhs_var in new_mapping
- and new_mapping[lhs_var] == lhs_var
- ):
- new_mapping[lhs_var] = "conflict_" + lhs_var
-
- matches.append((lhs_idx, rhs_idx))
-
- matches.append((old_end, new_end))
-
- # Recursively handle sequences between matches
- if committed_anything:
- for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]):
- recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next)
-
- recurse(0, len(old_line_infos), 0, len(new_line_infos))
-
- # Commit to remaining names and resolve conflicts
- for new_name, mapped_name in new_mapping.items():
- if mapped_name in committed_names:
- continue
- if not mapped_name.startswith("conflict_"):
- assert mapped_name == new_name
- committed_names.add(mapped_name)
-
- for new_name, mapped_name in new_mapping.items():
- if mapped_name in committed_names:
- continue
- assert mapped_name.startswith("conflict_")
-
- m = NUMERIC_SUFFIX_RE.search(new_name)
- base_name = new_name[: m.start()]
- suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1
- while True:
- candidate = f"{base_name}{suffix}"
- if candidate not in committed_names:
- new_mapping[new_name] = candidate
- committed_names.add(candidate)
- break
- suffix += 1
-
- return new_mapping
-
-
def generalize_check_lines_common(
lines,
is_analyze,
vars_seen,
global_vars_seen,
nameless_values,
- nameless_value_regex: re.Pattern,
+ nameless_value_regex,
is_asm,
preserve_names,
- original_check_lines=None,
):
# This gets called for each match that occurs in
# a line. We transform variables we haven't seen
# into defs, and variables we have seen into uses.
- def transform_line_vars(match, transform_locals=True):
+ def transform_line_vars(match):
var = get_name_from_ir_value_match(match)
nameless_value = get_nameless_value_from_match(match, nameless_values)
if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
@@ -1559,8 +1210,6 @@ def generalize_check_lines_common(
)
key = (var, nameless_value.check_key)
is_local_def = nameless_value.is_local_def_ir_value()
- if is_local_def and not transform_locals:
- return None
if is_local_def and key in vars_seen:
rv = nameless_value.get_value_use(var, match)
elif not is_local_def and key in global_vars_seen:
@@ -1579,15 +1228,13 @@ def generalize_check_lines_common(
# including the commas and spaces.
return match.group(1) + rv + match.group(match.lastindex)
- def transform_non_local_line_vars(match):
- return transform_line_vars(match, False)
-
+ lines_with_def = []
multiple_braces_re = re.compile(r"({{+)|(}}+)")
def escape_braces(match_obj):
return '{{' + re.escape(match_obj.group(0)) + '}}'
- if not is_asm and not is_analyze:
- for i, line in enumerate(lines):
+ for i, line in enumerate(lines):
+ if not is_asm and not is_analyze:
# An IR variable named '%.' matches the FileCheck regex string.
line = line.replace("%.", "%dot")
for regex in _global_hex_value_regex:
@@ -1605,136 +1252,25 @@ def generalize_check_lines_common(
# Ignore any comments, since the check lines will too.
scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
lines[i] = scrubbed_line
-
- if not preserve_names:
- if is_asm:
- for i, _ in enumerate(lines):
- # It can happen that two matches are back-to-back and for some reason sub
- # will not replace both of them. For now we work around this by
- # substituting until there is no more match.
- changed = True
- while changed:
- (lines[i], changed) = nameless_value_regex.subn(
- transform_line_vars, lines[i], count=1
- )
- else:
- # LLVM IR case. Start by handling global meta variables (global IR variables,
- # metadata, attributes)
- for i, _ in enumerate(lines):
- start = 0
- while True:
- m = nameless_value_regex.search(lines[i][start:])
- if m is None:
- break
- start += m.start()
- sub = transform_non_local_line_vars(m)
- if sub is not None:
- lines[i] = (
- lines[i][:start] + sub + lines[i][start + len(m.group(0)) :]
- )
- start += 1
-
- # Collect information about new check lines and original check lines (if any)
- new_line_infos = []
- for line in lines:
- filtered_line = ""
- values = []
- while True:
- m = nameless_value_regex.search(line)
- if m is None:
- filtered_line += line
- break
-
- var = get_name_from_ir_value_match(m)
- nameless_value = get_nameless_value_from_match(m, nameless_values)
- var = nameless_value.get_value_name(
- var, nameless_value.check_prefix
- )
-
- # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
- filtered_line += (
- line[: m.start()]
- + m.group(1)
- + VARIABLE_TAG
- + m.group(m.lastindex)
- )
- line = line[m.end() :]
- values.append(
- CheckValueInfo(
- nameless_value=nameless_value,
- var=var,
- prefix=nameless_value.get_ir_prefix_from_ir_value_match(m)[
- 0
- ],
- )
- )
- new_line_infos.append(CheckLineInfo(filtered_line, values))
-
- orig_line_infos = []
- for line in original_check_lines or []:
- filtered_line = ""
- values = []
- while True:
- m = METAVAR_RE.search(line)
- if m is None:
- filtered_line += line
- break
-
- # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
- filtered_line += line[: m.start()] + VARIABLE_TAG
- line = line[m.end() :]
- values.append(
- CheckValueInfo(
- nameless_value=None,
- var=m.group(1),
- prefix=None,
- )
- )
- orig_line_infos.append(CheckLineInfo(filtered_line, values))
-
- # Compute the variable name mapping
- committed_names = set(vars_seen)
-
- mapping = remap_metavar_names(
- orig_line_infos, new_line_infos, committed_names
- )
-
- for i, line_info in enumerate(new_line_infos):
- line_template = line_info.line
- line = ""
-
- for value in line_info.values:
- idx = line_template.find(VARIABLE_TAG)
- line += line_template[:idx]
- line_template = line_template[idx + len(VARIABLE_TAG) :]
-
- key = (mapping[value.var], nameless_value.check_key)
- is_local_def = nameless_value.is_local_def_ir_value()
- if is_local_def:
- if mapping[value.var] in vars_seen:
- line += f"[[{mapping[value.var]}]]"
- else:
- line += f"[[{mapping[value.var]}:{value.prefix}{value.nameless_value.get_ir_regex()}]]"
- vars_seen.add(mapping[value.var])
- else:
- raise RuntimeError("not implemented")
-
- line += line_template
-
- lines[i] = line
-
- if is_analyze:
- for i, _ in enumerate(lines):
+ if not preserve_names:
+ # It can happen that two matches are back-to-back and for some reason sub
+ # will not replace both of them. For now we work around this by
+ # substituting until there is no more match.
+ changed = True
+ while changed:
+ (lines[i], changed) = nameless_value_regex.subn(
+ transform_line_vars, lines[i], count=1
+ )
+ if is_analyze:
# Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
lines[i] = scrubbed_line
-
return lines
# Replace IR value defs and uses with FileCheck variables.
def generalize_check_lines(
- lines, is_analyze, vars_seen, global_vars_seen, preserve_names, original_check_lines
+ lines, is_analyze, vars_seen, global_vars_seen, preserve_names
):
return generalize_check_lines_common(
lines,
@@ -1745,7 +1281,6 @@ def generalize_check_lines(
IR_VALUE_RE,
False,
preserve_names,
- original_check_lines=original_check_lines,
)
@@ -1802,7 +1337,6 @@ def add_checks(
global_vars_seen_dict,
is_filtered,
preserve_names=False,
- original_check_lines: Mapping[str, List[str]] = {},
):
# prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
prefix_exclusions = set()
@@ -1875,7 +1409,6 @@ def add_checks(
vars_seen,
global_vars_seen,
preserve_names,
- original_check_lines=[],
)[0]
func_name_separator = func_dict[checkprefix][func_name].func_name_separator
if "[[" in args_and_sig:
@@ -1983,12 +1516,7 @@ def add_checks(
# to variable naming fashions.
else:
func_body = generalize_check_lines(
- func_body,
- False,
- vars_seen,
- global_vars_seen,
- preserve_names,
- original_check_lines=original_check_lines.get(checkprefix),
+ func_body, False, vars_seen, global_vars_seen, preserve_names
)
# This could be selectively enabled with an optional invocation argument.
@@ -2050,7 +1578,6 @@ def add_ir_checks(
version,
global_vars_seen_dict,
is_filtered,
- original_check_lines={},
):
# Label format is based on IR string.
if function_sig and version > 1:
@@ -2075,7 +1602,6 @@ def add_ir_checks(
global_vars_seen_dict,
is_filtered,
preserve_names,
- original_check_lines=original_check_lines,
)
@@ -2364,7 +1890,6 @@ def get_autogennote_suffix(parser, args):
"llvm_bin",
"verbose",
"force_update",
- "reset_variable_names",
):
continue
value = getattr(args, action.dest)