diff options
author | Johannes Doerfert <johannes@jdoerfert.de> | 2020-08-10 13:59:07 -0500 |
---|---|---|
committer | Johannes Doerfert <johannes@jdoerfert.de> | 2020-08-12 01:04:16 -0500 |
commit | 97ce7fd89fcc92d84c1938108388f735d55d372c (patch) | |
tree | e7b23c1da95a0fbacac77b1533d1175fc4504c89 /llvm/utils/UpdateTestChecks/common.py | |
parent | 31e5f7120bdd2f76337686d9d169b1c00e6ee69c (diff) | |
download | llvm-97ce7fd89fcc92d84c1938108388f735d55d372c.zip llvm-97ce7fd89fcc92d84c1938108388f735d55d372c.tar.gz llvm-97ce7fd89fcc92d84c1938108388f735d55d372c.tar.bz2 |
[UpdateTestChecks] Match unnamed values like "@[0-9]+" and "![0-9]+"
With this patch we will match most *uses* of "temporary" named things in
the IR via regular expressions, not their name at creation time. The new
"values" we match are:
- "unnamed" globals: `@[0-9]+`
- debug metadata: `!dbg ![0-9]+`
- loop metadata: `!loop ![0-9]+`
- tbaa metadata: `!tbaa ![0-9]+`
- range metadata: `!range ![0-9]+`
- generic metadata: `metadata ![0-9]+`
- attributes groups: `#[0-9]`
We still don't match the declarations but that can be done later. This
patch can introduce churn when existing check lines contain the old
hardcoded versions of the above "values". We can add a flag to opt-out,
or opt-in, if necessary.
Reviewed By: arichardson, MaskRay
Differential Revision: https://reviews.llvm.org/D85099
Diffstat (limited to 'llvm/utils/UpdateTestChecks/common.py')
-rw-r--r-- | llvm/utils/UpdateTestChecks/common.py | 146 |
1 files changed, 109 insertions, 37 deletions
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 35b7ba6..5bc478a 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -229,12 +229,12 @@ class function_body(object): def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs): arg_names = set() def drop_arg_names(match): - arg_names.add(match.group(2)) - return match.group(1) + match.group(3) + arg_names.add(match.group(3)) + return match.group(1) + match.group(match.lastindex) def repl_arg_names(match): - if match.group(2) in arg_names: - return match.group(1) + match.group(3) - return match.group(1) + match.group(2) + match.group(3) + if match.group(3) in arg_names: + return match.group(1) + match.group(match.lastindex) + return match.group(1) + match.group(2) + match.group(match.lastindex) if self.attrs != attrs: return False ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig) @@ -297,49 +297,111 @@ def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_too SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') -# Match things that look at identifiers, but only if they are followed by -# spaces, commas, paren, or end of the string -IR_VALUE_RE = re.compile(r'(\s+)%([\w.-]+?)([,\s\(\)]|\Z)') - -NAMELESS_PREFIX = "TMP" +# TODO: We should also derive check lines for global, debug, loop declarations, etc.. + +class NamelessValue: + def __init__(self, check_prefix, ir_prefix, ir_regexp): + self.check_prefix = check_prefix + self.ir_prefix = ir_prefix + self.ir_regexp = ir_regexp + +# Description of the different "unnamed" values we match in the IR, e.g., +# (local) ssa values, (debug) metadata, etc. +nameless_values = [ + NamelessValue(r'TMP', r'%', r'[\w.-]+?'), + NamelessValue(r'GLOB', r'@', r'[0-9]+?'), + NamelessValue(r'ATTR', r'#', r'[0-9]+?'), + NamelessValue(r'DBG', r'!dbg !', r'[0-9]+?'), + NamelessValue(r'TBAA', r'!tbaa !', r'[0-9]+?'), + NamelessValue(r'RNG', r'!range !', r'[0-9]+?'), + NamelessValue(r'LOOP', r'!llvm.loop !', r'[0-9]+?'), + NamelessValue(r'META', r'metadata !', r'[0-9]+?'), +] + +# Build the regexp that matches an "IR value". This can be a local variable, +# argument, global, or metadata, anything that is "named". It is important that +# the PREFIX and SUFFIX below only contain a single group, if that changes +# other locations will need adjustment as well. +IR_VALUE_REGEXP_PREFIX = r'(\s+)' +IR_VALUE_REGEXP_STRING = r'' +for nameless_value in nameless_values: + if IR_VALUE_REGEXP_STRING: + IR_VALUE_REGEXP_STRING += '|' + IR_VALUE_REGEXP_STRING += nameless_value.ir_prefix + r'(' + nameless_value.ir_regexp + r')' +IR_VALUE_REGEXP_SUFFIX = r'([,\s\(\)]|\Z)' +IR_VALUE_RE = re.compile(IR_VALUE_REGEXP_PREFIX + r'(' + IR_VALUE_REGEXP_STRING + r')' + IR_VALUE_REGEXP_SUFFIX) + +# The entire match is group 0, the prefix has one group (=1), the entire +# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. +first_nameless_group_in_ir_value_match = 3 + +# Check a match for IR_VALUE_RE and inspect it to determine if it was a local +# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above. +def get_idx_from_ir_value_match(match): + for i in range(first_nameless_group_in_ir_value_match, match.lastindex): + if match.group(i) is not None: + return i - first_nameless_group_in_ir_value_match + error("Unable to identify the kind of IR value from the match!") + return 0; + +# See get_idx_from_ir_value_match +def get_name_from_ir_value_match(match): + return match.group(get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match) + +# Return the nameless prefix we use for this kind or IR value, see also +# get_idx_from_ir_value_match +def get_nameless_check_prefix_from_ir_value_match(match): + return nameless_values[get_idx_from_ir_value_match(match)].check_prefix + +# Return the IR prefix we use for this kind or IR value, e.g., % for locals, +# see also get_idx_from_ir_value_match +def get_ir_prefix_from_ir_value_match(match): + return nameless_values[get_idx_from_ir_value_match(match)].ir_prefix + +# Return true if this kind or IR value is "local", basically if it matches '%{{.*}}'. +def is_local_ir_value_match(match): + return nameless_values[get_idx_from_ir_value_match(match)].ir_prefix == '%' # Create a FileCheck variable name based on an IR name. -def get_value_name(var): +def get_value_name(var, match): if var.isdigit(): - var = NAMELESS_PREFIX + var + var = get_nameless_check_prefix_from_ir_value_match(match) + var var = var.replace('.', '_') var = var.replace('-', '_') return var.upper() - # Create a FileCheck variable from regex. -def get_value_definition(var): - return '[[' + get_value_name(var) + ':%.*]]' - +def get_value_definition(var, match): + return '[[' + get_value_name(var, match) + ':' + get_ir_prefix_from_ir_value_match(match) + '.*]]' # Use a FileCheck variable. -def get_value_use(var): - return '[[' + get_value_name(var) + ']]' +def get_value_use(var, match): + return '[[' + get_value_name(var, match) + ']]' # Replace IR value defs and uses with FileCheck variables. -def genericize_check_lines(lines, is_analyze, vars_seen): +def genericize_check_lines(lines, is_analyze, vars_seen, global_vars_seen): # This gets called for each match that occurs in # a line. We transform variables we haven't seen # into defs, and variables we have seen into uses. def transform_line_vars(match): - var = match.group(2) - if NAMELESS_PREFIX.lower() in var.lower(): - warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,)) - if var in vars_seen: - rv = get_value_use(var) + pre = get_ir_prefix_from_ir_value_match(match) + var = get_name_from_ir_value_match(match) + for nameless_value in nameless_values: + if re.fullmatch(nameless_value.ir_prefix + r'[0-9]+?', var, re.IGNORECASE): + warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,)) + if (pre, var) in vars_seen or (pre, var) in global_vars_seen: + rv = get_value_use(var, match) else: - vars_seen.add(var) - rv = get_value_definition(var) + if is_local_ir_value_match(match): + vars_seen.add((pre, var)) + else: + global_vars_seen.add((pre, var)) + rv = get_value_definition(var, match) # re.sub replaces the entire regex match # with whatever you return, so we have # to make sure to hand it back everything # including the commas and spaces. - return match.group(1) + rv + match.group(3) + return match.group(1) + rv + match.group(match.lastindex) lines_with_def = [] @@ -348,14 +410,18 @@ def genericize_check_lines(lines, is_analyze, vars_seen): line = line.replace('%.', '%dot') # Ignore any comments, since the check lines will too. scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) - if is_analyze: - lines[i] = scrubbed_line - else: - lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line) + lines[i] = scrubbed_line + if not is_analyze: + # It can happen that two matches are back-to-back and for some reason sub + # will not replace both of them. For now we work around this by + # substituting until there is no more match. + changed = True + while changed: + (lines[i], changed) = IR_VALUE_RE.subn(transform_line_vars, lines[i], count=1) return lines -def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze): +def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze, global_vars_seen_dict): # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. prefix_exclusions = set() printed_prefixes = [] @@ -389,6 +455,10 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, if len(printed_prefixes) != 0: output_lines.append(comment_marker) + if checkprefix not in global_vars_seen_dict: + global_vars_seen_dict[checkprefix] = set() + global_vars_seen = global_vars_seen_dict[checkprefix] + vars_seen = set() printed_prefixes.append(checkprefix) attrs = str(func_dict[checkprefix][func_name].attrs) @@ -396,7 +466,7 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, if attrs: output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs)) args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) - args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0] + args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0] if '[[' in args_and_sig: output_lines.append(check_label_format % (checkprefix, func_name, '')) output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig)) @@ -416,7 +486,7 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, # For IR output, change all defs to FileCheck variables, so we're immune # to variable naming fashions. - func_body = genericize_check_lines(func_body, is_analyze, vars_seen) + func_body = genericize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen) # This could be selectively enabled with an optional invocation argument. # Disabled for now: better to check everything. Be safe rather than sorry. @@ -453,16 +523,18 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, break def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, - func_name, preserve_names, function_sig): + func_name, preserve_names, function_sig, global_vars_seen_dict): # Label format is based on IR string. function_def_regex = 'define {{[^@]+}}' if function_sig else '' check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex) add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, - check_label_format, False, preserve_names) + check_label_format, False, preserve_names, global_vars_seen_dict) def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker) - add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True) + global_vars_see_dict = {} + add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, + check_label_format, False, True, global_vars_seen_dict) def check_prefix(prefix): |