diff options
author | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2024-03-08 04:24:28 +0100 |
---|---|---|
committer | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2024-03-08 04:25:07 +0100 |
commit | b565126b4dbef0d9f5c9f3ef8f3489ff6581218f (patch) | |
tree | 3738e242856c20ceb583f92df2cef3f5e91e95b6 /llvm/utils/UpdateTestChecks/common.py | |
parent | 66bd3cd75b32ccfa8d228c200cf4fbf72d49fd1f (diff) | |
download | llvm-b565126b4dbef0d9f5c9f3ef8f3489ff6581218f.zip llvm-b565126b4dbef0d9f5c9f3ef8f3489ff6581218f.tar.gz llvm-b565126b4dbef0d9f5c9f3ef8f3489ff6581218f.tar.bz2 |
Revert "update_test_checks: keep meta variables stable by default"
This reverts commit fb02f9ac84a6151e41aba8f7391edd132a9aaf14.
Looks like some Python version incompatibility, will investigate.
Diffstat (limited to 'llvm/utils/UpdateTestChecks/common.py')
-rw-r--r-- | llvm/utils/UpdateTestChecks/common.py | 513 |
1 files changed, 19 insertions, 494 deletions
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index f766d54..a3365fe 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -1,8 +1,6 @@ from __future__ import print_function import argparse -import bisect -import collections import copy import glob import itertools @@ -12,7 +10,7 @@ import subprocess import sys import shlex -from typing import List, Mapping, Set +from typing import List ##### Common utilities for update_*test_checks.py @@ -422,48 +420,6 @@ def should_add_line_to_output( return True -def collect_original_check_lines(ti: TestInfo, prefix_set: set): - """ - Collect pre-existing check lines into a dictionary `result` which is - returned. - - result[func_name][prefix] is filled with a list of right-hand-sides of check - lines. - """ - result = {} - - current_function = None - for input_line_info in ti.ro_iterlines(): - input_line = input_line_info.line - if current_function is not None: - if input_line == "": - continue - if input_line.lstrip().startswith(";"): - m = CHECK_RE.match(input_line) - if ( - m is not None - and m.group(1) in prefix_set - and m.group(2) not in ["LABEL", "SAME"] - ): - if m.group(1) not in current_function: - current_function[m.group(1)] = [] - current_function[m.group(1)].append(input_line[m.end() :].strip()) - continue - current_function = None - - m = IR_FUNCTION_RE.match(input_line) - if m is not None: - func_name = m.group(1) - if ti.args.function is not None and func_name != ti.args.function: - # When filtering on a specific function, skip all others. - continue - - assert func_name not in result - current_function = result[func_name] = {} - - return result - - # Perform lit-like substitutions def getSubstitutions(sourcepath): sourcedir = os.path.dirname(sourcepath) @@ -535,7 +491,7 @@ RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$") CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") CHECK_RE = re.compile( - r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" + r"^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:" ) CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") @@ -1231,325 +1187,20 @@ def may_clash_with_default_check_prefix_name(check_prefix, var): ) -def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: - """ - Find a large ordered matching between strings in lhs and rhs. - - Think of this as finding the *unchanged* lines in a diff, where the entries - of lhs and rhs are lines of the files being diffed. - - Returns a list of matched (lhs_idx, rhs_idx) pairs. - """ - - if not lhs or not rhs: - return [] - - # Collect matches in reverse order. - matches = [] - - # First, collect a set of candidate matching edges. We limit this to a - # constant multiple of the input size to avoid quadratic runtime. - patterns = collections.defaultdict(lambda: ([], [])) - - for idx in range(len(lhs)): - patterns[lhs[idx]][0].append(idx) - for idx in range(len(rhs)): - patterns[rhs[idx]][1].append(idx) - - multiple_patterns = [] - - candidates = [] - for pattern in patterns.values(): - if not pattern[0] or not pattern[1]: - continue - - if len(pattern[0]) == len(pattern[1]) == 1: - candidates.append((pattern[0][0], pattern[1][0])) - else: - multiple_patterns.append(pattern) - - multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) - - for pattern in multiple_patterns: - if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( - len(lhs) + len(rhs) - ): - break - for lhs_idx in pattern[0]: - for rhs_idx in pattern[1]: - candidates.append((lhs_idx, rhs_idx)) - - if not candidates: - # The LHS and RHS either share nothing in common, or lines are just too - # identical. In that case, let's give up and not match anything. - return [] - - # Compute a maximal crossing-free matching via an algorithm that is - # inspired by a mixture of dynamic programming and line-sweeping in - # discrete geometry. - # - # I would be surprised if this algorithm didn't exist somewhere in the - # literature, but I found it without consciously recalling any - # references, so you'll have to make do with the explanation below. - # Sorry. - # - # The underlying graph is bipartite: - # - nodes on the LHS represent lines in the original check - # - nodes on the RHS represent lines in the new (updated) check - # - # Nodes are implicitly sorted by the corresponding line number. - # Edges (unique_matches) are sorted by the line number on the LHS. - # - # Here's the geometric intuition for the algorithm. - # - # * Plot the edges as points in the plane, with the original line - # number on the X axis and the updated line number on the Y axis. - # * The goal is to find a longest "chain" of points where each point - # is strictly above and to the right of the previous point. - # * The algorithm proceeds by sweeping a vertical line from left to - # right. - # * The algorithm maintains a table where `table[N]` answers the - # question "What is currently the 'best' way to build a chain of N+1 - # points to the left of the vertical line". Here, 'best' means - # that the last point of the chain is a as low as possible (minimal - # Y coordinate). - # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of - # the last point in the chain and `y` is its Y coordinate - # * A key invariant is that the Y values in the table are - # monotonically increasing - # * Thanks to these properties, the table can be used to answer the - # question "What is the longest chain that can be built to the left - # of the vertical line using only points below a certain Y value", - # using a binary search over the table. - # * The algorithm also builds a backlink structure in which every point - # links back to the previous point on a best (longest) chain ending - # at that point - # - # The core loop of the algorithm sweeps the line and updates the table - # and backlink structure for every point that we cross during the sweep. - # Therefore, the algorithm is trivially O(M log M) in the number of - # points. - candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) - - backlinks = [] - table = [] - for _, rhs_idx in candidates: - candidate_idx = len(backlinks) - ti = bisect.bisect_left(table, rhs_idx, key=lambda entry: entry[0]) - - # Update the table to record a best chain ending in the current point. - # There always is one, and if any of the previously visited points had - # a higher Y coordinate, then there is always a previously recorded best - # chain that can be improved upon by using the current point. - # - # There is only one case where there is some ambiguity. If the - # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as - # the current point (this can only happen if the same line appeared - # multiple times on the LHS), then we could choose to keep the - # previously recorded best chain instead. That would bias the algorithm - # differently but should have no systematic impact on the quality of the - # result. - if ti < len(table): - table[ti] = (rhs_idx, candidate_idx) - else: - table.append((rhs_idx, candidate_idx)) - if ti > 0: - backlinks.append(table[ti - 1][1]) - else: - backlinks.append(None) - - # Commit to names in the matching by walking the backlinks. Recursively - # attempt to fill in more matches in-betweem. - match_idx = table[-1][1] - while match_idx is not None: - current = candidates[match_idx] - matches.append(current) - match_idx = backlinks[match_idx] - - matches.reverse() - return matches - - -VARIABLE_TAG = "[[@@]]" -METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") -NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") - - -class CheckValueInfo: - def __init__( - self, - nameless_value: NamelessValue, - var: str, - prefix: str, - ): - self.nameless_value = nameless_value - self.var = var - self.prefix = prefix - - -# Represent a check line in a way that allows us to compare check lines while -# ignoring some or all of the FileCheck variable names. -class CheckLineInfo: - def __init__(self, line, values): - # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG - self.line: str = line - - # Information on each FileCheck variable name occurrences in the line - self.values: List[CheckValueInfo] = values - - def __repr__(self): - return f"CheckLineInfo(line={self.line}, self.values={self.values})" - - -def remap_metavar_names( - old_line_infos: List[CheckLineInfo], - new_line_infos: List[CheckLineInfo], - committed_names: Set[str], -) -> Mapping[str, str]: - """ - Map all FileCheck variable names that appear in new_line_infos to new - FileCheck variable names in an attempt to reduce the diff from old_line_infos - to new_line_infos. - - This is done by: - * Matching old check lines and new check lines using a diffing algorithm - applied after replacing names with wildcards. - * Committing to variable names such that the matched lines become equal - (without wildcards) if possible - * This is done recursively to handle cases where many lines are equal - after wildcard replacement - """ - # Initialize uncommitted identity mappings - new_mapping = {} - for line in new_line_infos: - for value in line.values: - new_mapping[value.var] = value.var - - # Recursively commit to the identity mapping or find a better one - def recurse(old_begin, old_end, new_begin, new_end): - if old_begin == old_end or new_begin == new_end: - return - - # Find a matching of lines where uncommitted names are replaced - # with a placeholder. - def diffify_line(line, mapper): - values = [] - for value in line.values: - mapped = mapper(value.var) - values.append(mapped if mapped in committed_names else "?") - return line.line.strip() + " @@@ " + " @ ".join(values) - - lhs_lines = [ - diffify_line(line, lambda x: x) - for line in old_line_infos[old_begin:old_end] - ] - rhs_lines = [ - diffify_line(line, lambda x: new_mapping[x]) - for line in new_line_infos[new_begin:new_end] - ] - - candidate_matches = find_diff_matching(lhs_lines, rhs_lines) - - # Apply commits greedily on a match-by-match basis - matches = [(-1, -1)] - committed_anything = False - for lhs_idx, rhs_idx in candidate_matches: - lhs_line = old_line_infos[lhs_idx] - rhs_line = new_line_infos[rhs_idx] - - local_commits = {} - - for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): - if new_mapping[rhs_value.var] in committed_names: - # The new value has already been committed. If it was mapped - # to the same name as the original value, we can consider - # committing other values from this line. Otherwise, we - # should ignore this line. - if new_mapping[rhs_value.var] == lhs_value.var: - continue - else: - break - - if rhs_value.var in local_commits: - # Same, but for a possible commit happening on the same line - if local_commits[rhs_value.var] == lhs_value.var: - continue - else: - break - - if lhs_value.var in committed_names: - # We can't map this value because the name we would map it to has already been - # committed for something else. Give up on this line. - break - - local_commits[rhs_value.var] = lhs_value.var - else: - # No reason not to add any commitments for this line - for rhs_var, lhs_var in local_commits.items(): - new_mapping[rhs_var] = lhs_var - committed_names.add(lhs_var) - committed_anything = True - - if ( - lhs_var != rhs_var - and lhs_var in new_mapping - and new_mapping[lhs_var] == lhs_var - ): - new_mapping[lhs_var] = "conflict_" + lhs_var - - matches.append((lhs_idx, rhs_idx)) - - matches.append((old_end, new_end)) - - # Recursively handle sequences between matches - if committed_anything: - for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): - recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) - - recurse(0, len(old_line_infos), 0, len(new_line_infos)) - - # Commit to remaining names and resolve conflicts - for new_name, mapped_name in new_mapping.items(): - if mapped_name in committed_names: - continue - if not mapped_name.startswith("conflict_"): - assert mapped_name == new_name - committed_names.add(mapped_name) - - for new_name, mapped_name in new_mapping.items(): - if mapped_name in committed_names: - continue - assert mapped_name.startswith("conflict_") - - m = NUMERIC_SUFFIX_RE.search(new_name) - base_name = new_name[: m.start()] - suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 - while True: - candidate = f"{base_name}{suffix}" - if candidate not in committed_names: - new_mapping[new_name] = candidate - committed_names.add(candidate) - break - suffix += 1 - - return new_mapping - - def generalize_check_lines_common( lines, is_analyze, vars_seen, global_vars_seen, nameless_values, - nameless_value_regex: re.Pattern, + nameless_value_regex, is_asm, preserve_names, - original_check_lines=None, ): # This gets called for each match that occurs in # a line. We transform variables we haven't seen # into defs, and variables we have seen into uses. - def transform_line_vars(match, transform_locals=True): + def transform_line_vars(match): var = get_name_from_ir_value_match(match) nameless_value = get_nameless_value_from_match(match, nameless_values) if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var): @@ -1559,8 +1210,6 @@ def generalize_check_lines_common( ) key = (var, nameless_value.check_key) is_local_def = nameless_value.is_local_def_ir_value() - if is_local_def and not transform_locals: - return None if is_local_def and key in vars_seen: rv = nameless_value.get_value_use(var, match) elif not is_local_def and key in global_vars_seen: @@ -1579,15 +1228,13 @@ def generalize_check_lines_common( # including the commas and spaces. return match.group(1) + rv + match.group(match.lastindex) - def transform_non_local_line_vars(match): - return transform_line_vars(match, False) - + lines_with_def = [] multiple_braces_re = re.compile(r"({{+)|(}}+)") def escape_braces(match_obj): return '{{' + re.escape(match_obj.group(0)) + '}}' - if not is_asm and not is_analyze: - for i, line in enumerate(lines): + for i, line in enumerate(lines): + if not is_asm and not is_analyze: # An IR variable named '%.' matches the FileCheck regex string. line = line.replace("%.", "%dot") for regex in _global_hex_value_regex: @@ -1605,136 +1252,25 @@ def generalize_check_lines_common( # Ignore any comments, since the check lines will too. scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) lines[i] = scrubbed_line - - if not preserve_names: - if is_asm: - for i, _ in enumerate(lines): - # It can happen that two matches are back-to-back and for some reason sub - # will not replace both of them. For now we work around this by - # substituting until there is no more match. - changed = True - while changed: - (lines[i], changed) = nameless_value_regex.subn( - transform_line_vars, lines[i], count=1 - ) - else: - # LLVM IR case. Start by handling global meta variables (global IR variables, - # metadata, attributes) - for i, _ in enumerate(lines): - start = 0 - while True: - m = nameless_value_regex.search(lines[i][start:]) - if m is None: - break - start += m.start() - sub = transform_non_local_line_vars(m) - if sub is not None: - lines[i] = ( - lines[i][:start] + sub + lines[i][start + len(m.group(0)) :] - ) - start += 1 - - # Collect information about new check lines and original check lines (if any) - new_line_infos = [] - for line in lines: - filtered_line = "" - values = [] - while True: - m = nameless_value_regex.search(line) - if m is None: - filtered_line += line - break - - var = get_name_from_ir_value_match(m) - nameless_value = get_nameless_value_from_match(m, nameless_values) - var = nameless_value.get_value_name( - var, nameless_value.check_prefix - ) - - # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. - filtered_line += ( - line[: m.start()] - + m.group(1) - + VARIABLE_TAG - + m.group(m.lastindex) - ) - line = line[m.end() :] - values.append( - CheckValueInfo( - nameless_value=nameless_value, - var=var, - prefix=nameless_value.get_ir_prefix_from_ir_value_match(m)[ - 0 - ], - ) - ) - new_line_infos.append(CheckLineInfo(filtered_line, values)) - - orig_line_infos = [] - for line in original_check_lines or []: - filtered_line = "" - values = [] - while True: - m = METAVAR_RE.search(line) - if m is None: - filtered_line += line - break - - # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. - filtered_line += line[: m.start()] + VARIABLE_TAG - line = line[m.end() :] - values.append( - CheckValueInfo( - nameless_value=None, - var=m.group(1), - prefix=None, - ) - ) - orig_line_infos.append(CheckLineInfo(filtered_line, values)) - - # Compute the variable name mapping - committed_names = set(vars_seen) - - mapping = remap_metavar_names( - orig_line_infos, new_line_infos, committed_names - ) - - for i, line_info in enumerate(new_line_infos): - line_template = line_info.line - line = "" - - for value in line_info.values: - idx = line_template.find(VARIABLE_TAG) - line += line_template[:idx] - line_template = line_template[idx + len(VARIABLE_TAG) :] - - key = (mapping[value.var], nameless_value.check_key) - is_local_def = nameless_value.is_local_def_ir_value() - if is_local_def: - if mapping[value.var] in vars_seen: - line += f"[[{mapping[value.var]}]]" - else: - line += f"[[{mapping[value.var]}:{value.prefix}{value.nameless_value.get_ir_regex()}]]" - vars_seen.add(mapping[value.var]) - else: - raise RuntimeError("not implemented") - - line += line_template - - lines[i] = line - - if is_analyze: - for i, _ in enumerate(lines): + if not preserve_names: + # It can happen that two matches are back-to-back and for some reason sub + # will not replace both of them. For now we work around this by + # substituting until there is no more match. + changed = True + while changed: + (lines[i], changed) = nameless_value_regex.subn( + transform_line_vars, lines[i], count=1 + ) + if is_analyze: # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) lines[i] = scrubbed_line - return lines # Replace IR value defs and uses with FileCheck variables. def generalize_check_lines( - lines, is_analyze, vars_seen, global_vars_seen, preserve_names, original_check_lines + lines, is_analyze, vars_seen, global_vars_seen, preserve_names ): return generalize_check_lines_common( lines, @@ -1745,7 +1281,6 @@ def generalize_check_lines( IR_VALUE_RE, False, preserve_names, - original_check_lines=original_check_lines, ) @@ -1802,7 +1337,6 @@ def add_checks( global_vars_seen_dict, is_filtered, preserve_names=False, - original_check_lines: Mapping[str, List[str]] = {}, ): # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. prefix_exclusions = set() @@ -1875,7 +1409,6 @@ def add_checks( vars_seen, global_vars_seen, preserve_names, - original_check_lines=[], )[0] func_name_separator = func_dict[checkprefix][func_name].func_name_separator if "[[" in args_and_sig: @@ -1983,12 +1516,7 @@ def add_checks( # to variable naming fashions. else: func_body = generalize_check_lines( - func_body, - False, - vars_seen, - global_vars_seen, - preserve_names, - original_check_lines=original_check_lines.get(checkprefix), + func_body, False, vars_seen, global_vars_seen, preserve_names ) # This could be selectively enabled with an optional invocation argument. @@ -2050,7 +1578,6 @@ def add_ir_checks( version, global_vars_seen_dict, is_filtered, - original_check_lines={}, ): # Label format is based on IR string. if function_sig and version > 1: @@ -2075,7 +1602,6 @@ def add_ir_checks( global_vars_seen_dict, is_filtered, preserve_names, - original_check_lines=original_check_lines, ) @@ -2364,7 +1890,6 @@ def get_autogennote_suffix(parser, args): "llvm_bin", "verbose", "force_update", - "reset_variable_names", ): continue value = getattr(args, action.dest) |