1 files changed, 39 insertions, 247 deletions
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
index 8cd200c9..2dad16a 100644
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -29,7 +29,7 @@ Version changelog:
    'none' and 'all'. 'smart' is the default.
 5: Basic block labels are matched by FileCheck expressions
 6: The semantics of TBAA checks has been incorporated in the check lines.
-7: Indent switch-cases correctly.
+7: Indent switch-cases correctly; CHECK-EMPTY instead of skipping blank lines.
 """
 DEFAULT_VERSION = 6
 
@@ -607,7 +607,7 @@ MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
 DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
 
 IS_DEBUG_RECORD_RE = re.compile(r"^(\s+)#dbg_")
-IS_SWITCH_CASE_RE = re.compile(r"^\s+i\d+ \d+, label %\w+")
+IS_SWITCH_CASE_RE = re.compile(r"^\s+i\d+ \d+, label %\S+")
 
 SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
 SCRUB_WHITESPACE_RE = re.compile(r"(?!^(|  \w))[ \t]+", flags=re.M)
@@ -1123,6 +1123,8 @@ class FunctionTestBuilder:
 ##### Generator of LLVM IR CHECK lines
 
 SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*")
+# Comments to indicate the predecessors of a block in the IR.
+SCRUB_PRED_COMMENT_RE = re.compile(r"\s*; preds = .*")
 SCRUB_IR_FUNC_META_RE = re.compile(r"((?:\!(?!dbg\b)[a-zA-Z_]\w*(?:\s+![0-9]+)?)\s*)+")
 
 # TODO: We should also derive check lines for global, debug, loop declarations, etc..
@@ -1361,7 +1363,7 @@ def make_ir_generalizer(version, no_meta_details):
     ]
 
     prefix = r"(\s*)"
-    suffix = r"([,\s\(\)\}]|\Z)"
+    suffix = r"([,\s\(\)\}\]]|\Z)"
 
     # values = [
     #     nameless_value
@@ -1877,6 +1879,7 @@ def generalize_check_lines(
     *,
     unstable_globals_only=False,
     no_meta_details=False,
+    ignore_all_comments=True,  # If False, only ignore comments of predecessors
 ):
     if unstable_globals_only:
         regexp = ginfo.get_unstable_globals_regexp()
@@ -1904,8 +1907,12 @@ def generalize_check_lines(
                         line,
                     )
                     break
-            # Ignore any comments, since the check lines will too.
-            scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
+            if ignore_all_comments:
+                # Ignore any comments, since the check lines will too.
+                scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line)
+            else:
+                # Ignore comments of predecessors only.
+                scrubbed_line = SCRUB_PRED_COMMENT_RE.sub(r"", line)
             # Ignore the metadata details if check global is none
             if no_meta_details:
                 scrubbed_line = SCRUB_IR_FUNC_META_RE.sub(r"{{.*}}", scrubbed_line)
@@ -2083,6 +2090,7 @@ def add_checks(
     global_tbaa_records_for_prefixes={},
     preserve_names=False,
     original_check_lines: Mapping[str, List[str]] = {},
+    check_inst_comments=True,
 ):
     # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
     prefix_exclusions = set()
@@ -2272,6 +2280,14 @@ def add_checks(
             # For IR output, change all defs to FileCheck variables, so we're immune
             # to variable naming fashions.
             else:
+                if ginfo.get_version() >= 7:
+                    # Record the indices of blank lines in the function body preemptively.
+                    blank_line_indices = {
+                        i for i, line in enumerate(func_body) if line.strip() == ""
+                    }
+                else:
+                    blank_line_indices = set()
+
                 func_body = generalize_check_lines(
                     func_body,
                     ginfo,
@@ -2280,6 +2296,8 @@ def add_checks(
                     global_tbaa_records,
                     preserve_names,
                     original_check_lines=original_check_lines.get(checkprefix),
+                    # IR output might require comments checks, e.g., print-predicate-info, print<memssa>
+                    ignore_all_comments=not check_inst_comments,
                 )
 
                 # This could be selectively enabled with an optional invocation argument.
@@ -2295,12 +2313,22 @@ def add_checks(
 
                 is_blank_line = False
 
-                for func_line in func_body:
+                for idx, func_line in enumerate(func_body):
                     if func_line.strip() == "":
-                        is_blank_line = True
+                        # We should distinguish if the line is a 'fake' blank line generated by
+                        # generalize_check_lines removing comments.
+                        # Fortunately, generalize_check_lines does not change the index of each line,
+                        # we can record the indices of blank lines preemptively.
+                        if idx in blank_line_indices:
+                            output_lines.append(
+                                "{} {}-EMPTY:".format(comment_marker, checkprefix)
+                            )
+                        else:
+                            is_blank_line = True
                         continue
-                    # Do not waste time checking IR comments.
-                    func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
+                    if not check_inst_comments:
+                        # Do not waste time checking IR comments unless necessary.
+                        func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line)
 
                     # Skip blank lines instead of checking them.
                     if is_blank_line:
@@ -2342,6 +2370,7 @@ def add_ir_checks(
     global_vars_seen_dict,
     global_tbaa_records_for_prefixes,
     is_filtered,
+    check_inst_comments=False,
     original_check_lines={},
 ):
     assert ginfo.is_ir()
@@ -2368,6 +2397,7 @@ def add_ir_checks(
         global_tbaa_records_for_prefixes,
         preserve_names,
         original_check_lines=original_check_lines,
+        check_inst_comments=check_inst_comments,
     )
 
 
@@ -2396,244 +2426,6 @@ def add_analyze_checks(
     )
 
 
-IR_FUNC_NAME_RE = re.compile(
-    r"^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\("
-)
-IR_PREFIX_DATA_RE = re.compile(r"^ *(;|$)")
-MIR_FUNC_NAME_RE = re.compile(r" *name: *(?P<func>[A-Za-z0-9_.-]+)")
-MIR_BODY_BEGIN_RE = re.compile(r" *body: *\|")
-MIR_BASIC_BLOCK_RE = re.compile(r" *bb\.[0-9]+.*:$")
-MIR_PREFIX_DATA_RE = re.compile(r"^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)")
-
-
-def find_mir_functions_with_one_bb(lines, verbose=False):
-    result = []
-    cur_func = None
-    bbs = 0
-    for line in lines:
-        m = MIR_FUNC_NAME_RE.match(line)
-        if m:
-            if bbs == 1:
-                result.append(cur_func)
-            cur_func = m.group("func")
-            bbs = 0
-        m = MIR_BASIC_BLOCK_RE.match(line)
-        if m:
-            bbs += 1
-    if bbs == 1:
-        result.append(cur_func)
-    return result
-
-
-def add_mir_checks_for_function(
-    test,
-    output_lines,
-    run_list,
-    func_dict,
-    func_name,
-    single_bb,
-    print_fixed_stack,
-    first_check_is_next,
-    at_the_function_name,
-):
-    printed_prefixes = set()
-    for run in run_list:
-        for prefix in run[0]:
-            if prefix in printed_prefixes:
-                break
-            if not func_dict[prefix][func_name]:
-                continue
-            if printed_prefixes:
-                # Add some space between different check prefixes.
-                indent = len(output_lines[-1]) - len(output_lines[-1].lstrip(" "))
-                output_lines.append(" " * indent + ";")
-            printed_prefixes.add(prefix)
-            add_mir_check_lines(
-                test,
-                output_lines,
-                prefix,
-                ("@" if at_the_function_name else "") + func_name,
-                single_bb,
-                func_dict[prefix][func_name],
-                print_fixed_stack,
-                first_check_is_next,
-            )
-            break
-        else:
-            warn(
-                "Found conflicting asm for function: {}".format(func_name),
-                test_file=test,
-            )
-    return output_lines
-
-
-def add_mir_check_lines(
-    test,
-    output_lines,
-    prefix,
-    func_name,
-    single_bb,
-    func_info,
-    print_fixed_stack,
-    first_check_is_next,
-):
-    func_body = str(func_info).splitlines()
-    if single_bb:
-        # Don't bother checking the basic block label for a single BB
-        func_body.pop(0)
-
-    if not func_body:
-        warn(
-            "Function has no instructions to check: {}".format(func_name),
-            test_file=test,
-        )
-        return
-
-    first_line = func_body[0]
-    indent = len(first_line) - len(first_line.lstrip(" "))
-    # A check comment, indented the appropriate amount
-    check = "{:>{}}; {}".format("", indent, prefix)
-
-    output_lines.append("{}-LABEL: name: {}".format(check, func_name))
-
-    if print_fixed_stack:
-        output_lines.append("{}: fixedStack:".format(check))
-        for stack_line in func_info.extrascrub.splitlines():
-            filecheck_directive = check + "-NEXT"
-            output_lines.append("{}: {}".format(filecheck_directive, stack_line))
-
-    first_check = not first_check_is_next
-    for func_line in func_body:
-        if not func_line.strip():
-            # The mir printer prints leading whitespace so we can't use CHECK-EMPTY:
-            output_lines.append(check + "-NEXT: {{" + func_line + "$}}")
-            continue
-        filecheck_directive = check if first_check else check + "-NEXT"
-        first_check = False
-        check_line = "{}: {}".format(filecheck_directive, func_line[indent:]).rstrip()
-        output_lines.append(check_line)
-
-
-def should_add_mir_line_to_output(input_line, prefix_set):
-    # Skip any check lines that we're handling as well as comments
-    m = CHECK_RE.match(input_line)
-    if (m and m.group(1) in prefix_set) or input_line.strip() == ";":
-        return False
-    return True
-
-
-def add_mir_checks(
-    input_lines,
-    prefix_set,
-    autogenerated_note,
-    test,
-    run_list,
-    func_dict,
-    print_fixed_stack,
-    first_check_is_next,
-    at_the_function_name,
-):
-    simple_functions = find_mir_functions_with_one_bb(input_lines)
-
-    output_lines = []
-    output_lines.append(autogenerated_note)
-
-    func_name = None
-    state = "toplevel"
-    for input_line in input_lines:
-        if input_line == autogenerated_note:
-            continue
-
-        if state == "toplevel":
-            m = IR_FUNC_NAME_RE.match(input_line)
-            if m:
-                state = "ir function prefix"
-                func_name = m.group("func")
-            if input_line.rstrip("| \r\n") == "---":
-                state = "document"
-            output_lines.append(input_line)
-        elif state == "document":
-            m = MIR_FUNC_NAME_RE.match(input_line)
-            if m:
-                state = "mir function metadata"
-                func_name = m.group("func")
-            if input_line.strip() == "...":
-                state = "toplevel"
-                func_name = None
-            if should_add_mir_line_to_output(input_line, prefix_set):
-                output_lines.append(input_line)
-        elif state == "mir function metadata":
-            if should_add_mir_line_to_output(input_line, prefix_set):
-                output_lines.append(input_line)
-            m = MIR_BODY_BEGIN_RE.match(input_line)
-            if m:
-                if func_name in simple_functions:
-                    # If there's only one block, put the checks inside it
-                    state = "mir function prefix"
-                    continue
-                state = "mir function body"
-                add_mir_checks_for_function(
-                    test,
-                    output_lines,
-                    run_list,
-                    func_dict,
-                    func_name,
-                    single_bb=False,
-                    print_fixed_stack=print_fixed_stack,
-                    first_check_is_next=first_check_is_next,
-                    at_the_function_name=at_the_function_name,
-                )
-        elif state == "mir function prefix":
-            m = MIR_PREFIX_DATA_RE.match(input_line)
-            if not m:
-                state = "mir function body"
-                add_mir_checks_for_function(
-                    test,
-                    output_lines,
-                    run_list,
-                    func_dict,
-                    func_name,
-                    single_bb=True,
-                    print_fixed_stack=print_fixed_stack,
-                    first_check_is_next=first_check_is_next,
-                    at_the_function_name=at_the_function_name,
-                )
-
-            if should_add_mir_line_to_output(input_line, prefix_set):
-                output_lines.append(input_line)
-        elif state == "mir function body":
-            if input_line.strip() == "...":
-                state = "toplevel"
-                func_name = None
-            if should_add_mir_line_to_output(input_line, prefix_set):
-                output_lines.append(input_line)
-        elif state == "ir function prefix":
-            m = IR_PREFIX_DATA_RE.match(input_line)
-            if not m:
-                state = "ir function body"
-                add_mir_checks_for_function(
-                    test,
-                    output_lines,
-                    run_list,
-                    func_dict,
-                    func_name,
-                    single_bb=False,
-                    print_fixed_stack=print_fixed_stack,
-                    first_check_is_next=first_check_is_next,
-                    at_the_function_name=at_the_function_name,
-                )
-
-            if should_add_mir_line_to_output(input_line, prefix_set):
-                output_lines.append(input_line)
-        elif state == "ir function body":
-            if input_line.strip() == "}":
-                state = "toplevel"
-                func_name = None
-            if should_add_mir_line_to_output(input_line, prefix_set):
-                output_lines.append(input_line)
-    return output_lines
-
-
 def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo):
     for nameless_value in ginfo.get_nameless_values():
         if nameless_value.global_ir_rhs_regexp is None: