diff options
Diffstat (limited to 'llvm/utils/UpdateTestChecks/common.py')
-rw-r--r-- | llvm/utils/UpdateTestChecks/common.py | 146 |
1 files changed, 109 insertions, 37 deletions
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 35b7ba6..5bc478a 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -229,12 +229,12 @@ class function_body(object): def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs): arg_names = set() def drop_arg_names(match): - arg_names.add(match.group(2)) - return match.group(1) + match.group(3) + arg_names.add(match.group(3)) + return match.group(1) + match.group(match.lastindex) def repl_arg_names(match): - if match.group(2) in arg_names: - return match.group(1) + match.group(3) - return match.group(1) + match.group(2) + match.group(3) + if match.group(3) in arg_names: + return match.group(1) + match.group(match.lastindex) + return match.group(1) + match.group(2) + match.group(match.lastindex) if self.attrs != attrs: return False ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig) @@ -297,49 +297,111 @@ def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_too SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') -# Match things that look at identifiers, but only if they are followed by -# spaces, commas, paren, or end of the string -IR_VALUE_RE = re.compile(r'(\s+)%([\w.-]+?)([,\s\(\)]|\Z)') - -NAMELESS_PREFIX = "TMP" +# TODO: We should also derive check lines for global, debug, loop declarations, etc.. + +class NamelessValue: + def __init__(self, check_prefix, ir_prefix, ir_regexp): + self.check_prefix = check_prefix + self.ir_prefix = ir_prefix + self.ir_regexp = ir_regexp + +# Description of the different "unnamed" values we match in the IR, e.g., +# (local) ssa values, (debug) metadata, etc. +nameless_values = [ + NamelessValue(r'TMP', r'%', r'[\w.-]+?'), + NamelessValue(r'GLOB', r'@', r'[0-9]+?'), + NamelessValue(r'ATTR', r'#', r'[0-9]+?'), + NamelessValue(r'DBG', r'!dbg !', r'[0-9]+?'), + NamelessValue(r'TBAA', r'!tbaa !', r'[0-9]+?'), + NamelessValue(r'RNG', r'!range !', r'[0-9]+?'), + NamelessValue(r'LOOP', r'!llvm.loop !', r'[0-9]+?'), + NamelessValue(r'META', r'metadata !', r'[0-9]+?'), +] + +# Build the regexp that matches an "IR value". This can be a local variable, +# argument, global, or metadata, anything that is "named". It is important that +# the PREFIX and SUFFIX below only contain a single group, if that changes +# other locations will need adjustment as well. +IR_VALUE_REGEXP_PREFIX = r'(\s+)' +IR_VALUE_REGEXP_STRING = r'' +for nameless_value in nameless_values: + if IR_VALUE_REGEXP_STRING: + IR_VALUE_REGEXP_STRING += '|' + IR_VALUE_REGEXP_STRING += nameless_value.ir_prefix + r'(' + nameless_value.ir_regexp + r')' +IR_VALUE_REGEXP_SUFFIX = r'([,\s\(\)]|\Z)' +IR_VALUE_RE = re.compile(IR_VALUE_REGEXP_PREFIX + r'(' + IR_VALUE_REGEXP_STRING + r')' + IR_VALUE_REGEXP_SUFFIX) + +# The entire match is group 0, the prefix has one group (=1), the entire +# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. +first_nameless_group_in_ir_value_match = 3 + +# Check a match for IR_VALUE_RE and inspect it to determine if it was a local +# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above. +def get_idx_from_ir_value_match(match): + for i in range(first_nameless_group_in_ir_value_match, match.lastindex): + if match.group(i) is not None: + return i - first_nameless_group_in_ir_value_match + error("Unable to identify the kind of IR value from the match!") + return 0; + +# See get_idx_from_ir_value_match +def get_name_from_ir_value_match(match): + return match.group(get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match) + +# Return the nameless prefix we use for this kind or IR value, see also +# get_idx_from_ir_value_match +def get_nameless_check_prefix_from_ir_value_match(match): + return nameless_values[get_idx_from_ir_value_match(match)].check_prefix + +# Return the IR prefix we use for this kind or IR value, e.g., % for locals, +# see also get_idx_from_ir_value_match +def get_ir_prefix_from_ir_value_match(match): + return nameless_values[get_idx_from_ir_value_match(match)].ir_prefix + +# Return true if this kind or IR value is "local", basically if it matches '%{{.*}}'. +def is_local_ir_value_match(match): + return nameless_values[get_idx_from_ir_value_match(match)].ir_prefix == '%' # Create a FileCheck variable name based on an IR name. -def get_value_name(var): +def get_value_name(var, match): if var.isdigit(): - var = NAMELESS_PREFIX + var + var = get_nameless_check_prefix_from_ir_value_match(match) + var var = var.replace('.', '_') var = var.replace('-', '_') return var.upper() - # Create a FileCheck variable from regex. -def get_value_definition(var): - return '[[' + get_value_name(var) + ':%.*]]' - +def get_value_definition(var, match): + return '[[' + get_value_name(var, match) + ':' + get_ir_prefix_from_ir_value_match(match) + '.*]]' # Use a FileCheck variable. -def get_value_use(var): - return '[[' + get_value_name(var) + ']]' +def get_value_use(var, match): + return '[[' + get_value_name(var, match) + ']]' # Replace IR value defs and uses with FileCheck variables. -def genericize_check_lines(lines, is_analyze, vars_seen): +def genericize_check_lines(lines, is_analyze, vars_seen, global_vars_seen): # This gets called for each match that occurs in # a line. We transform variables we haven't seen # into defs, and variables we have seen into uses. def transform_line_vars(match): - var = match.group(2) - if NAMELESS_PREFIX.lower() in var.lower(): - warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,)) - if var in vars_seen: - rv = get_value_use(var) + pre = get_ir_prefix_from_ir_value_match(match) + var = get_name_from_ir_value_match(match) + for nameless_value in nameless_values: + if re.fullmatch(nameless_value.ir_prefix + r'[0-9]+?', var, re.IGNORECASE): + warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,)) + if (pre, var) in vars_seen or (pre, var) in global_vars_seen: + rv = get_value_use(var, match) else: - vars_seen.add(var) - rv = get_value_definition(var) + if is_local_ir_value_match(match): + vars_seen.add((pre, var)) + else: + global_vars_seen.add((pre, var)) + rv = get_value_definition(var, match) # re.sub replaces the entire regex match # with whatever you return, so we have # to make sure to hand it back everything # including the commas and spaces. - return match.group(1) + rv + match.group(3) + return match.group(1) + rv + match.group(match.lastindex) lines_with_def = [] @@ -348,14 +410,18 @@ def genericize_check_lines(lines, is_analyze, vars_seen): line = line.replace('%.', '%dot') # Ignore any comments, since the check lines will too. scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) - if is_analyze: - lines[i] = scrubbed_line - else: - lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line) + lines[i] = scrubbed_line + if not is_analyze: + # It can happen that two matches are back-to-back and for some reason sub + # will not replace both of them. For now we work around this by + # substituting until there is no more match. + changed = True + while changed: + (lines[i], changed) = IR_VALUE_RE.subn(transform_line_vars, lines[i], count=1) return lines -def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze): +def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze, global_vars_seen_dict): # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. prefix_exclusions = set() printed_prefixes = [] @@ -389,6 +455,10 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, if len(printed_prefixes) != 0: output_lines.append(comment_marker) + if checkprefix not in global_vars_seen_dict: + global_vars_seen_dict[checkprefix] = set() + global_vars_seen = global_vars_seen_dict[checkprefix] + vars_seen = set() printed_prefixes.append(checkprefix) attrs = str(func_dict[checkprefix][func_name].attrs) @@ -396,7 +466,7 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, if attrs: output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs)) args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) - args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0] + args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0] if '[[' in args_and_sig: output_lines.append(check_label_format % (checkprefix, func_name, '')) output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig)) @@ -416,7 +486,7 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, # For IR output, change all defs to FileCheck variables, so we're immune # to variable naming fashions. - func_body = genericize_check_lines(func_body, is_analyze, vars_seen) + func_body = genericize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen) # This could be selectively enabled with an optional invocation argument. # Disabled for now: better to check everything. Be safe rather than sorry. @@ -453,16 +523,18 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, break def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, - func_name, preserve_names, function_sig): + func_name, preserve_names, function_sig, global_vars_seen_dict): # Label format is based on IR string. function_def_regex = 'define {{[^@]+}}' if function_sig else '' check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex) add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, - check_label_format, False, preserve_names) + check_label_format, False, preserve_names, global_vars_seen_dict) def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker) - add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True) + global_vars_see_dict = {} + add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, + check_label_format, False, True, global_vars_seen_dict) def check_prefix(prefix): |