26 files changed, 1037 insertions, 122 deletions
diff --git a/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
new file mode 100644
index 0000000..3bcbcbb
--- /dev/null
+++ b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s
@@ -0,0 +1,38 @@
+# This test is to ensure that we query data marker symbols to avoid
+# misidentifying constant data island symbol as extra entry point.
+
+# RUN: %clang %cflags %s -o %t.so -Wl,-q -Wl,--init=_bar -Wl,--fini=_bar
+# RUN: llvm-bolt %t.so -o %t.instr.so
+
+  .text
+  .global _start
+  .type _start, %function
+_start:
+  ret
+
+  .text
+  .global _foo
+  .type _foo, %function
+_foo:
+  cbz x1, _foo_2
+_foo_1:
+  add x1, x2, x0
+  b _foo
+_foo_2:
+  ret
+
+# None of these constant island symbols should be identified as extra entry
+# point for function `_foo'.
+  .align 4
+_const1: .short 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80
+_const2: .short 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0
+_const3: .short 0x04, 0x08, 0x0c, 0x20, 0x60, 0x80, 0xa0, 0xc0
+
+  .text
+  .global _bar
+  .type _bar, %function
+_bar:
+  ret
+
+  # Dummy relocation to force relocation mode
+  .reloc 0, R_AARCH64_NONE
diff --git a/bolt/test/AArch64/unsupported-passes.test b/bolt/test/AArch64/unsupported-passes.test
new file mode 100644
index 0000000..886fc1c
--- /dev/null
+++ b/bolt/test/AArch64/unsupported-passes.test
@@ -0,0 +1,8 @@
+// Checks that non-fully supported passes on AArch64 are handled appropriately.
+
+// REQUIRES: system-linux,asserts,target=aarch64{{.*}}
+
+RUN: %clang %cflags %p/../Inputs/hello.c -o %t -Wl,-q
+RUN: not llvm-bolt %t -o %t.bolt --frame-opt=all 2>&1 | FileCheck %s
+
+CHECK: BOLT-ERROR: frame-optimizer is supported only on X86
diff --git a/bolt/test/AArch64/validate-secondary-entry-point.s b/bolt/test/AArch64/validate-secondary-entry-point.s
index 0099a0e..3ad6946 100644
--- a/bolt/test/AArch64/validate-secondary-entry-point.s
+++ b/bolt/test/AArch64/validate-secondary-entry-point.s
@@ -1,13 +1,23 @@
 # This test is to verify that BOLT won't take a label pointing to constant
-# island as a secondary entry point (function `_start` doesn't have ELF size
-# set originally) and the function won't otherwise be mistaken as non-simple.
+# island as a secondary entry point. This could happen when function doesn't
+# have ELF size set if it is from assembly code, or a constant island is
+# referenced by another function discovered during relocation processing.
 
-# RUN: %clang %cflags -pie %s -o %t.so -Wl,-q -Wl,--init=_foo -Wl,--fini=_foo
+# RUN: split-file %s %t
+
+# RUN: %clang %cflags -pie %t/tt.asm -o %t.so \
+# RUN:   -Wl,-q -Wl,--init=_foo -Wl,--fini=_foo
 # RUN: llvm-bolt %t.so -o %t.bolt.so --print-cfg 2>&1 | FileCheck %s
 # CHECK-NOT: BOLT-WARNING: reference in the middle of instruction detected \
 # CHECK-NOT:   function _start at offset 0x{{[0-9a-f]+}}
 # CHECK: Binary Function "_start" after building cfg
 
+# RUN: %clang %cflags -ffunction-sections -shared %t/tt.c %t/ss.c -o %tt.so \
+# RUN:   -Wl,-q -Wl,--init=_start -Wl,--fini=_start \
+# RUN:   -Wl,--version-script=%t/linker_script
+# RUN: llvm-bolt %tt.so -o %tt.bolted.so
+
+;--- tt.asm
   .text
 
   .global _foo
@@ -32,3 +42,31 @@ _bar:
 
   # Dummy relocation to force relocation mode
   .reloc 0, R_AARCH64_NONE
+
+;--- tt.c
+void _start() {}
+
+__attribute__((naked)) void foo() {
+  asm("ldr x16, .L_fnptr\n"
+      "blr x16\n"
+      "ret\n"
+
+      "_rodatx:"
+      ".global _rodatx;"
+      ".quad 0;"
+      ".L_fnptr:"
+      ".quad 0;");
+}
+
+;--- ss.c
+__attribute__((visibility("hidden"))) extern void* _rodatx;
+void* bar() { return &_rodatx; }
+
+;--- linker_script
+{
+global:
+  _start;
+  foo;
+  bar;
+local: *;
+};
diff --git a/bolt/test/AArch64/veneer-lld-abs.s b/bolt/test/AArch64/veneer-lld-abs.s
index b22301d..77d6f0ce2 100644
--- a/bolt/test/AArch64/veneer-lld-abs.s
+++ b/bolt/test/AArch64/veneer-lld-abs.s
@@ -12,7 +12,7 @@
 
 ## Occasionally, we see the linker not generating $d symbols for long veneers
 ## causing BOLT to fail veneer elimination.
-# RUN: llvm-objcopy --remove-symbol-prefix=\$d %t.exe %t.no-marker.exe
+# RUN: llvm-objcopy --remove-symbol-prefix='$d' %t.exe %t.no-marker.exe
 # RUN: llvm-bolt %t.no-marker.exe -o %t.no-marker.bolt \
 # RUN:   2>&1 | FileCheck %s --check-prefix=CHECK-BOLT
 # RUN: llvm-objdump -d -j .text  %t.no-marker.bolt | \
diff --git a/bolt/test/Inputs/multi-func.cpp b/bolt/test/Inputs/multi-func.cpp
new file mode 100644
index 0000000..61c968f
--- /dev/null
+++ b/bolt/test/Inputs/multi-func.cpp
@@ -0,0 +1,24 @@
+#include <iostream>
+
+// Multiple functions to test selective dumping
+int add(int a, int b) { return a + b; }
+
+int multiply(int a, int b) { return a * b; }
+
+int main_helper() {
+  std::cout << "Helper function" << std::endl;
+  return 42;
+}
+
+int main_secondary() { return add(5, 3); }
+
+void other_function() { std::cout << "Other function" << std::endl; }
+
+int main() {
+  int result = add(10, 20);
+  result = multiply(result, 2);
+  main_helper();
+  main_secondary();
+  other_function();
+  return result;
+}
diff --git a/bolt/test/X86/double-jump.test b/bolt/test/X86/double-jump.test
index 424747c..94b1578 100644
--- a/bolt/test/X86/double-jump.test
+++ b/bolt/test/X86/double-jump.test
@@ -1,15 +1,11 @@
 ## Test the double jump removal peephole.
 
-## This test has commands that rely on shell capabilities that won't execute
-## correctly on Windows e.g. subshell execution
-REQUIRES: shell
-
 RUN: %clangxx %cxxflags %p/Inputs/double_jump.cpp -o %t.exe
-RUN: (llvm-bolt %t.exe --peepholes=double-jumps \
-RUN:   --eliminate-unreachable -o %t 2>&1 \
-RUN:   && llvm-objdump -d %t --print-imm-hex --no-show-raw-insn) | FileCheck %s
+RUN: llvm-bolt %t.exe --peepholes=double-jumps \
+RUN:   --eliminate-unreachable -o %t | FileCheck --check-prefix CHECK-BOLT %s
+RUN: llvm-objdump -d %t --print-imm-hex --no-show-raw-insn | FileCheck %s
 
-CHECK: BOLT-INFO: Peephole: 1 double jumps patched.
+CHECK-BOLT: BOLT-INFO: Peephole: 1 double jumps patched.
 
 CHECK: <_Z3foom>:
 CHECK-NEXT: pushq %rbp
diff --git a/bolt/test/X86/dwarf5-debug-line-print.s b/bolt/test/X86/dwarf5-debug-line-print.s
new file mode 100644
index 0000000..b0a5bab
--- /dev/null
+++ b/bolt/test/X86/dwarf5-debug-line-print.s
@@ -0,0 +1,148 @@
+# REQUIRES: system-linux
+
+## Check that BOLT correctly prints debug line comments for DWARF-5.
+
+
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o
+# RUN: %clang %cflags -dwarf-5 %t1.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \
+# RUN:   --print-after-lowering -o %t.bolt | FileCheck %s
+
+# CHECK: xorq    %rdi, %rdi # debug line main.c:2:5
+
+# __attribute__((naked)) void _start() {
+#     __asm__(
+#         "xor %rdi, %rdi\n"   // exit code 0
+#         "mov $60, %rax\n"    // syscall number for exit
+#         "syscall\n"
+#     );
+# }
+
+        .file   "main.c"
+        .text
+        .globl  _start                          # -- Begin function _start
+        .p2align        4
+        .type   _start,@function
+_start:                                 # @_start
+.Lfunc_begin0:
+        .file   0 "/home/gpastukhov/tmp2" "main.c" md5 0x94c0e54a615c2a21415ddb904991abd8
+        .cfi_startproc
+# %bb.0:
+        .loc    0 2 5 prologue_end              # main.c:2:5
+        #APP
+        xorq    %rdi, %rdi
+        movq    $60, %rax
+        syscall
+
+        #NO_APP
+.Ltmp0:
+.Lfunc_end0:
+        .size   _start, .Lfunc_end0-_start
+        .cfi_endproc
+                                        # -- End function
+        .section        .debug_abbrev,"",@progbits
+        .byte   1                               # Abbreviation Code
+        .byte   17                              # DW_TAG_compile_unit
+        .byte   1                               # DW_CHILDREN_yes
+        .byte   37                              # DW_AT_producer
+        .byte   37                              # DW_FORM_strx1
+        .byte   19                              # DW_AT_language
+        .byte   5                               # DW_FORM_data2
+        .byte   3                               # DW_AT_name
+        .byte   37                              # DW_FORM_strx1
+        .byte   114                             # DW_AT_str_offsets_base
+        .byte   23                              # DW_FORM_sec_offset
+        .byte   16                              # DW_AT_stmt_list
+        .byte   23                              # DW_FORM_sec_offset
+        .byte   27                              # DW_AT_comp_dir
+        .byte   37                              # DW_FORM_strx1
+        .byte   17                              # DW_AT_low_pc
+        .byte   27                              # DW_FORM_addrx
+        .byte   18                              # DW_AT_high_pc
+        .byte   6                               # DW_FORM_data4
+        .byte   115                             # DW_AT_addr_base
+        .byte   23                              # DW_FORM_sec_offset
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   2                               # Abbreviation Code
+        .byte   46                              # DW_TAG_subprogram
+        .byte   0                               # DW_CHILDREN_no
+        .byte   17                              # DW_AT_low_pc
+        .byte   27                              # DW_FORM_addrx
+        .byte   18                              # DW_AT_high_pc
+        .byte   6                               # DW_FORM_data4
+        .byte   64                              # DW_AT_frame_base
+        .byte   24                              # DW_FORM_exprloc
+        .byte   3                               # DW_AT_name
+        .byte   37                              # DW_FORM_strx1
+        .byte   58                              # DW_AT_decl_file
+        .byte   11                              # DW_FORM_data1
+        .byte   59                              # DW_AT_decl_line
+        .byte   11                              # DW_FORM_data1
+        .byte   63                              # DW_AT_external
+        .byte   25                              # DW_FORM_flag_present
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   0                               # EOM(3)
+        .section        .debug_info,"",@progbits
+.Lcu_begin0:
+        .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+        .short  5                               # DWARF version number
+        .byte   1                               # DWARF Unit Type
+        .byte   8                               # Address Size (in bytes)
+        .long   .debug_abbrev                   # Offset Into Abbrev. Section
+        .byte   1                               # Abbrev [1] 0xc:0x23 DW_TAG_compile_unit
+        .byte   0                               # DW_AT_producer
+        .short  29                              # DW_AT_language
+        .byte   1                               # DW_AT_name
+        .long   .Lstr_offsets_base0             # DW_AT_str_offsets_base
+        .long   .Lline_table_start0             # DW_AT_stmt_list
+        .byte   2                               # DW_AT_comp_dir
+        .byte   0                               # DW_AT_low_pc
+        .long   .Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+        .long   .Laddr_table_base0              # DW_AT_addr_base
+        .byte   2                               # Abbrev [2] 0x23:0xb DW_TAG_subprogram
+        .byte   0                               # DW_AT_low_pc
+        .long   .Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+        .byte   1                               # DW_AT_frame_base
+        .byte   87
+        .byte   3                               # DW_AT_name
+        .byte   0                               # DW_AT_decl_file
+        .byte   1                               # DW_AT_decl_line
+                                        # DW_AT_external
+        .byte   0                               # End Of Children Mark
+.Ldebug_info_end0:
+        .section        .debug_str_offsets,"",@progbits
+        .long   20                              # Length of String Offsets Set
+        .short  5
+        .short  0
+.Lstr_offsets_base0:
+        .section        .debug_str,"MS",@progbits,1
+.Linfo_string0:
+        .asciz  "clang version 20.1.8 (CentOS 20.1.8-1.el9)" # string offset=0
+.Linfo_string1:
+        .asciz  "main.c"                        # string offset=43
+.Linfo_string2:
+        .asciz  "/home/gpastukhov/tmp2"         # string offset=50
+.Linfo_string3:
+        .asciz  "_start"                        # string offset=72
+        .section        .debug_str_offsets,"",@progbits
+        .long   .Linfo_string0
+        .long   .Linfo_string1
+        .long   .Linfo_string2
+        .long   .Linfo_string3
+        .section        .debug_addr,"",@progbits
+        .long   .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+        .short  5                               # DWARF version number
+        .byte   8                               # Address size
+        .byte   0                               # Segment selector size
+.Laddr_table_base0:
+        .quad   .Lfunc_begin0
+.Ldebug_addr_end0:
+        .ident  "clang version 20.1.8 (CentOS 20.1.8-1.el9)"
+        .section        ".note.GNU-stack","",@progbits
+        .addrsig
+        .section        .debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/bolt/test/X86/dwarf5-two-cus.s b/bolt/test/X86/dwarf5-two-cus.s
new file mode 100644
index 0000000..8b5afb4
--- /dev/null
+++ b/bolt/test/X86/dwarf5-two-cus.s
@@ -0,0 +1,251 @@
+## Check that BOLT correctly handles two CUs with DWARF-5 debug info (does not crash), when
+## a function from one CU is forced to be inlined into another.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t-main.o
+# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5_helper.s -o %thelper.o
+# RUN: %clang %cflags -gdwarf-5 -Wl,-q %t-main.o %thelper.o -o %t.exe
+# RUN: llvm-bolt %t.exe --update-debug-sections --force-inline=_Z3fooi \
+# RUN:   -o %t.bolt | FileCheck %s
+
+# CHECK-NOT: BOLT-ERROR
+# CHECK-NOT: BOLT-WARNING
+# CHECK: BOLT-INFO: inlined {{[0-9]+}} calls at {{[1-9][0-9]*}} call sites
+
+# extern int foo(int);
+# int main(){
+#     foo(10);
+#     return 0;
+# }
+        .file   "main.cpp"
+        .text
+        .globl  main                            # -- Begin function main
+        .p2align        4
+        .type   main,@function
+main:                                   # @main
+.Lfunc_begin0:
+        .file   0 "/home/gpastukhov/tmp2" "main.cpp" md5 0x5c930f5d3a068b09fd18ece59c58bdcf
+        .loc    0 2 0                           # main.cpp:2:0
+        .cfi_startproc
+# %bb.0:
+        pushq   %rax
+        .cfi_def_cfa_offset 16
+.Ltmp0:
+        .loc    0 3 5 prologue_end              # main.cpp:3:5
+        movl    $10, %edi
+        callq   _Z3fooi
+.Ltmp1:
+        .loc    0 4 5                           # main.cpp:4:5
+        xorl    %eax, %eax
+        .loc    0 4 5 epilogue_begin is_stmt 0  # main.cpp:4:5
+        popq    %rcx
+        .cfi_def_cfa_offset 8
+        retq
+.Ltmp2:
+.Lfunc_end0:
+        .size   main, .Lfunc_end0-main
+        .cfi_endproc
+                                        # -- End function
+        .section        .debug_abbrev,"",@progbits
+        .byte   1                               # Abbreviation Code
+        .byte   17                              # DW_TAG_compile_unit
+        .byte   1                               # DW_CHILDREN_yes
+        .byte   37                              # DW_AT_producer
+        .byte   37                              # DW_FORM_strx1
+        .byte   19                              # DW_AT_language
+        .byte   5                               # DW_FORM_data2
+        .byte   3                               # DW_AT_name
+        .byte   37                              # DW_FORM_strx1
+        .byte   114                             # DW_AT_str_offsets_base
+        .byte   23                              # DW_FORM_sec_offset
+        .byte   16                              # DW_AT_stmt_list
+        .byte   23                              # DW_FORM_sec_offset
+        .byte   27                              # DW_AT_comp_dir
+        .byte   37                              # DW_FORM_strx1
+        .byte   17                              # DW_AT_low_pc
+        .byte   27                              # DW_FORM_addrx
+        .byte   18                              # DW_AT_high_pc
+        .byte   6                               # DW_FORM_data4
+        .byte   115                             # DW_AT_addr_base
+        .byte   23                              # DW_FORM_sec_offset
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   2                               # Abbreviation Code
+        .byte   46                              # DW_TAG_subprogram
+        .byte   1                               # DW_CHILDREN_yes
+        .byte   17                              # DW_AT_low_pc
+        .byte   27                              # DW_FORM_addrx
+        .byte   18                              # DW_AT_high_pc
+        .byte   6                               # DW_FORM_data4
+        .byte   64                              # DW_AT_frame_base
+        .byte   24                              # DW_FORM_exprloc
+        .byte   122                             # DW_AT_call_all_calls
+        .byte   25                              # DW_FORM_flag_present
+        .byte   3                               # DW_AT_name
+        .byte   37                              # DW_FORM_strx1
+        .byte   58                              # DW_AT_decl_file
+        .byte   11                              # DW_FORM_data1
+        .byte   59                              # DW_AT_decl_line
+        .byte   11                              # DW_FORM_data1
+        .byte   73                              # DW_AT_type
+        .byte   19                              # DW_FORM_ref4
+        .byte   63                              # DW_AT_external
+        .byte   25                              # DW_FORM_flag_present
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   3                               # Abbreviation Code
+        .byte   72                              # DW_TAG_call_site
+        .byte   1                               # DW_CHILDREN_yes
+        .byte   127                             # DW_AT_call_origin
+        .byte   19                              # DW_FORM_ref4
+        .byte   125                             # DW_AT_call_return_pc
+        .byte   27                              # DW_FORM_addrx
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   4                               # Abbreviation Code
+        .byte   73                              # DW_TAG_call_site_parameter
+        .byte   0                               # DW_CHILDREN_no
+        .byte   2                               # DW_AT_location
+        .byte   24                              # DW_FORM_exprloc
+        .byte   126                             # DW_AT_call_value
+        .byte   24                              # DW_FORM_exprloc
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   5                               # Abbreviation Code
+        .byte   46                              # DW_TAG_subprogram
+        .byte   1                               # DW_CHILDREN_yes
+        .byte   110                             # DW_AT_linkage_name
+        .byte   37                              # DW_FORM_strx1
+        .byte   3                               # DW_AT_name
+        .byte   37                              # DW_FORM_strx1
+        .byte   58                              # DW_AT_decl_file
+        .byte   11                              # DW_FORM_data1
+        .byte   59                              # DW_AT_decl_line
+        .byte   11                              # DW_FORM_data1
+        .byte   73                              # DW_AT_type
+        .byte   19                              # DW_FORM_ref4
+        .byte   60                              # DW_AT_declaration
+        .byte   25                              # DW_FORM_flag_present
+        .byte   63                              # DW_AT_external
+        .byte   25                              # DW_FORM_flag_present
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   6                               # Abbreviation Code
+        .byte   5                               # DW_TAG_formal_parameter
+        .byte   0                               # DW_CHILDREN_no
+        .byte   73                              # DW_AT_type
+        .byte   19                              # DW_FORM_ref4
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   7                               # Abbreviation Code
+        .byte   36                              # DW_TAG_base_type
+        .byte   0                               # DW_CHILDREN_no
+        .byte   3                               # DW_AT_name
+        .byte   37                              # DW_FORM_strx1
+        .byte   62                              # DW_AT_encoding
+        .byte   11                              # DW_FORM_data1
+        .byte   11                              # DW_AT_byte_size
+        .byte   11                              # DW_FORM_data1
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   0                               # EOM(3)
+        .section        .debug_info,"",@progbits
+.Lcu_begin0:
+        .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+        .short  5                               # DWARF version number
+        .byte   1                               # DWARF Unit Type
+        .byte   8                               # Address Size (in bytes)
+        .long   .debug_abbrev                   # Offset Into Abbrev. Section
+        .byte   1                               # Abbrev [1] 0xc:0x47 DW_TAG_compile_unit
+        .byte   0                               # DW_AT_producer
+        .short  33                              # DW_AT_language
+        .byte   1                               # DW_AT_name
+        .long   .Lstr_offsets_base0             # DW_AT_str_offsets_base
+        .long   .Lline_table_start0             # DW_AT_stmt_list
+        .byte   2                               # DW_AT_comp_dir
+        .byte   0                               # DW_AT_low_pc
+        .long   .Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+        .long   .Laddr_table_base0              # DW_AT_addr_base
+        .byte   2                               # Abbrev [2] 0x23:0x1c DW_TAG_subprogram
+        .byte   0                               # DW_AT_low_pc
+        .long   .Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+        .byte   1                               # DW_AT_frame_base
+        .byte   87
+                                        # DW_AT_call_all_calls
+        .byte   6                               # DW_AT_name
+        .byte   0                               # DW_AT_decl_file
+        .byte   2                               # DW_AT_decl_line
+        .long   78                              # DW_AT_type
+                                        # DW_AT_external
+        .byte   3                               # Abbrev [3] 0x32:0xc DW_TAG_call_site
+        .long   63                              # DW_AT_call_origin
+        .byte   1                               # DW_AT_call_return_pc
+        .byte   4                               # Abbrev [4] 0x38:0x5 DW_TAG_call_site_parameter
+        .byte   1                               # DW_AT_location
+        .byte   85
+        .byte   1                               # DW_AT_call_value
+        .byte   58
+        .byte   0                               # End Of Children Mark
+        .byte   0                               # End Of Children Mark
+        .byte   5                               # Abbrev [5] 0x3f:0xf DW_TAG_subprogram
+        .byte   3                               # DW_AT_linkage_name
+        .byte   4                               # DW_AT_name
+        .byte   0                               # DW_AT_decl_file
+        .byte   1                               # DW_AT_decl_line
+        .long   78                              # DW_AT_type
+                                        # DW_AT_declaration
+                                        # DW_AT_external
+        .byte   6                               # Abbrev [6] 0x48:0x5 DW_TAG_formal_parameter
+        .long   78                              # DW_AT_type
+        .byte   0                               # End Of Children Mark
+        .byte   7                               # Abbrev [7] 0x4e:0x4 DW_TAG_base_type
+        .byte   5                               # DW_AT_name
+        .byte   5                               # DW_AT_encoding
+        .byte   4                               # DW_AT_byte_size
+        .byte   0                               # End Of Children Mark
+.Ldebug_info_end0:
+        .section        .debug_str_offsets,"",@progbits
+        .long   32                              # Length of String Offsets Set
+        .short  5
+        .short  0
+.Lstr_offsets_base0:
+        .section        .debug_str,"MS",@progbits,1
+.Linfo_string0:
+        .asciz  "clang version 20.1.8 (CentOS 20.1.8-1.el9)" # string offset=0
+.Linfo_string1:
+        .asciz  "main.cpp"                      # string offset=43
+.Linfo_string2:
+        .asciz  "/home/gpastukhov/tmp2"         # string offset=52
+.Linfo_string3:
+        .asciz  "_Z3fooi"                       # string offset=74
+.Linfo_string4:
+        .asciz  "foo"                           # string offset=82
+.Linfo_string5:
+        .asciz  "int"                           # string offset=86
+.Linfo_string6:
+        .asciz  "main"                          # string offset=90
+        .section        .debug_str_offsets,"",@progbits
+        .long   .Linfo_string0
+        .long   .Linfo_string1
+        .long   .Linfo_string2
+        .long   .Linfo_string3
+        .long   .Linfo_string4
+        .long   .Linfo_string5
+        .long   .Linfo_string6
+        .section        .debug_addr,"",@progbits
+        .long   .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+        .short  5                               # DWARF version number
+        .byte   8                               # Address size
+        .byte   0                               # Segment selector size
+.Laddr_table_base0:
+        .quad   .Lfunc_begin0
+        .quad   .Ltmp1
+.Ldebug_addr_end0:
+        .ident  "clang version 20.1.8 (CentOS 20.1.8-1.el9)"
+        .section        ".note.GNU-stack","",@progbits
+        .addrsig
+        .section        .debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/bolt/test/X86/hlt-terminator.s b/bolt/test/X86/hlt-terminator.s
new file mode 100644
index 0000000..3f67182
--- /dev/null
+++ b/bolt/test/X86/hlt-terminator.s
@@ -0,0 +1,24 @@
+## Check that HLT instruction is handled differently depending on the flags.
+## It's a terminator in the user-level code, but the execution can resume in
+## ring 0.
+
+# RUN: %clang %cflags %s -static -o %t.exe -nostdlib
+# RUN: llvm-bolt %t.exe --print-cfg --print-only=main --terminal-x86-hlt=0 \
+# RUN:   -o %t.ring0 2>&1 | FileCheck %s --check-prefix=CHECK-RING0
+# RUN: llvm-bolt %t.exe --print-cfg --print-only=main \
+# RUN:   -o %t.ring3 2>&1 | FileCheck %s --check-prefix=CHECK-RING3
+# RUN: llvm-objdump -d %t.ring0 --print-imm-hex | FileCheck %s --check-prefix=CHECK-BIN
+
+# CHECK-RING0: BB Count    : 1
+# CHECK-RING3: BB Count    : 2
+
+# CHECK-BIN: <main>:
+# CHECK-BIN-NEXT: f4                            hlt
+# CHECK-BIN-NEXT: c3                            retq
+
+.global main
+  .type main, %function
+main:
+        hlt
+        retq
+.size main, .-main
diff --git a/bolt/test/X86/jmp-optimization.test b/bolt/test/X86/jmp-optimization.test
index f969578..847c4822 100644
--- a/bolt/test/X86/jmp-optimization.test
+++ b/bolt/test/X86/jmp-optimization.test
@@ -1,10 +1,7 @@
 ## Tests the optimization of functions that just do a tail call in the beginning.
 
-## This test has commands that rely on shell capabilities that won't execute
-## correctly on Windows e.g. unsupported parameter expansion
-REQUIRES: shell
-
-RUN: %clangxx %cxxflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
+RUN: %clangxx %cxxflags -O2 %S/Inputs/jmp_opt.cpp %S/Inputs/jmp_opt2.cpp \
+RUN:   %S/Inputs/jmp_opt3.cpp -o %t
 RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
 RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
 
diff --git a/bolt/test/X86/jump-table-ambiguous-unreachable.s b/bolt/test/X86/jump-table-ambiguous-unreachable.s
new file mode 100644
index 0000000..eb87b96
--- /dev/null
+++ b/bolt/test/X86/jump-table-ambiguous-unreachable.s
@@ -0,0 +1,87 @@
+## Check that llvm-bolt correctly updates ambiguous jump table entries that
+## can correspond to either builtin_unreachable() or could be a pointer to
+## the next function.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -no-pie -Wl,-q
+
+# RUN: llvm-bolt %t.exe --print-normalized --print-only=foo -o %t.out \
+# RUN:   2>&1 | FileCheck %s
+
+
+
+  .text
+  .globl _start
+  .type _start, %function
+_start:
+  .cfi_startproc
+  call foo
+  ret
+  .cfi_endproc
+  .size _start, .-_start
+
+  .globl foo
+  .type foo, %function
+foo:
+	.cfi_startproc
+.LBB00:
+          movq	0x8(%rdi), %rdi
+          movzbl	0x1(%rdi), %eax
+.LBB00_br:
+	        jmpq	*"JUMP_TABLE/foo.0"(,%rax,8)
+# CHECK:  jmpq {{.*}} # JUMPTABLE
+# CHECK-NEXT: Successors: {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}
+
+.Ltmp87085:
+	xorl	%eax, %eax
+	retq
+
+.Ltmp87086:
+	cmpb	$0x0, 0x8(%rdi)
+	setne	%al
+	retq
+
+.Ltmp87088:
+	movb	$0x1, %al
+	retq
+
+.Ltmp87087:
+	movzbl	0x14(%rdi), %eax
+	andb	$0x2, %al
+	shrb	%al
+	retq
+
+	.cfi_endproc
+.size foo, .-foo
+
+  .globl bar
+  .type bar, %function
+bar:
+	.cfi_startproc
+  ret
+	.cfi_endproc
+  .size bar, .-bar
+
+# Jump tables
+.section .rodata
+  .global jump_table
+jump_table:
+"JUMP_TABLE/foo.0":
+  .quad bar
+  .quad	.Ltmp87085
+  .quad bar
+  .quad	.Ltmp87086
+  .quad	.Ltmp87087
+  .quad	.LBB00
+  .quad	.Ltmp87088
+  .quad bar
+  .quad	.LBB00
+
+# CHECK: Jump table {{.*}} for function foo
+# CHECK-NEXT: 0x{{.*}} : bar
+# CHECK-NEXT: 0x{{.*}} :
+# CHECK-NEXT: 0x{{.*}} : bar
+# CHECK-NEXT: 0x{{.*}} :
+# CHECK-NEXT: 0x{{.*}} :
diff --git a/bolt/test/X86/jump-table-icp.test b/bolt/test/X86/jump-table-icp.test
index f147432..a095929 100644
--- a/bolt/test/X86/jump-table-icp.test
+++ b/bolt/test/X86/jump-table-icp.test
@@ -4,11 +4,7 @@ RUN: link_fdata %p/Inputs/jump_table_icp.s %t.o %t.fdata --nmtool llvm-nm
 RUN: llvm-strip --strip-unneeded %t.o
 RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q
 
-## This test has commands that rely on shell capabilities that won't execute
-## correctly on Windows e.g. subshell execution
-REQUIRES: shell
-
-RUN: (llvm-bolt %t.exe --data %t.fdata -o %t --relocs \
+RUN: llvm-bolt %t.exe --data %t.fdata -o %t --relocs \
 RUN:   --reorder-blocks=cache --split-functions --split-all-cold \
 RUN:   --use-gnu-stack --dyno-stats --indirect-call-promotion=jump-tables \
 RUN:   --print-icp -v=0 \
@@ -16,8 +12,8 @@ RUN:   --enable-bat --print-cache-metrics \
 RUN:   --icp-jt-remaining-percent-threshold=10 \
 RUN:   --icp-jt-total-percent-threshold=2 \
 RUN:   --indirect-call-promotion-topn=1 \
-RUN:   --icp-jump-tables-targets --align-functions-max-bytes=7 2>&1 && \
-RUN:   llvm-objdump -d %t --print-imm-hex) | FileCheck %s
+RUN:   --icp-jump-tables-targets --align-functions-max-bytes=7 | FileCheck %s
+RUN: llvm-objdump -d %t --print-imm-hex | FileCheck --check-prefix CHECK-ASM %s
 
 BOLT-INFO: ICP total indirect callsites = 0
 BOLT-INFO: ICP total jump table callsites = 2
@@ -107,14 +103,14 @@ CHECK-NEXT:   Exec Count : 140
 CHECK:   Predecessors: .Ltmp{{.*}}, .LFT{{.*}}
 CHECK:   Successors: .Ltmp{{.*}} (mispreds: 0, count: 98)
 
-CHECK:     <_Z3inci>:
-CHECK:        	movq    0x{{.*}}(,%rax,8), %rax
-CHECK-NEXT:    cmpq    $0x{{.*}}, %rax
-CHECK-NEXT:    je {{.*}} <_Z3inci+0x{{.*}}>
-CHECK-NEXT:   	jmpq   *%rax
-
-CHECK:     <_Z7inc_dupi>:
-CHECK:        	movq    0x{{.*}}(,%rax,8), %rax
-CHECK-NEXT:    cmpq $0x{{.*}}, %rax
-CHECK-NEXT:    je {{.*}} <_Z7inc_dupi+0x{{.*}}>
-CHECK-NEXT:   	jmpq   *%rax
+CHECK-ASM:     <_Z3inci>:
+CHECK-ASM:        	movq    0x{{.*}}(,%rax,8), %rax
+CHECK-ASM-NEXT:    cmpq    $0x{{.*}}, %rax
+CHECK-ASM-NEXT:    je {{.*}} <_Z3inci+0x{{.*}}>
+CHECK-ASM-NEXT:   	jmpq   *%rax
+
+CHECK-ASM:     <_Z7inc_dupi>:
+CHECK-ASM:        	movq    0x{{.*}}(,%rax,8), %rax
+CHECK-ASM-NEXT:    cmpq $0x{{.*}}, %rax
+CHECK-ASM-NEXT:    je {{.*}} <_Z7inc_dupi+0x{{.*}}>
+CHECK-ASM-NEXT:   	jmpq   *%rax
diff --git a/bolt/test/X86/shrinkwrapping.test b/bolt/test/X86/shrinkwrapping.test
index 521b456..5470b5d 100644
--- a/bolt/test/X86/shrinkwrapping.test
+++ b/bolt/test/X86/shrinkwrapping.test
@@ -2,23 +2,21 @@
 ## shrink-wrapping when optimizing a function without
 ## frame pointers.
 
-## This test has commands that rely on shell capabilities that won't execute
-## correctly on Windows e.g. subshell execution to capture command output.
-REQUIRES: shell
-
 RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q
 RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \
 RUN:   --print-only=main --print-cfg \
 RUN:   --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \
 RUN:   FileCheck %s --check-prefix=CHECK-BOLT
 
-RUN: llvm-objdump --dwarf=frames %t.exe | grep -A20 -e \
-RUN:   `llvm-nm --numeric-sort %t.exe | grep main | tail -n 1 | \
-RUN:    cut -f1 -d' ' | tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-INPUT
+RUN: llvm-nm --numeric-sort %t.exe | grep main | tail -n 1 | \
+RUN:    cut -f1 -d' ' | tail -c9 > %t.input_address
+RUN: llvm-objdump --dwarf=frames %t.exe | grep -A20 -f %t.input_address \
+RUN:   2>&1 | FileCheck %s --check-prefix=CHECK-INPUT
 
-RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \
-RUN:   `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \
-RUN:    tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
+RUN: llvm-nm --numeric-sort %t | grep main | tail -n 1 | \
+RUN:   cut -f1 -d' ' | tail -c9 > %t.output_address
+RUN: llvm-objdump --dwarf=frames %t | grep -A20 -f %t.output_address \
+RUN:   2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
 
 CHECK-BOLT: Extern Entry Count: 100
 CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s b/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s
index 3f982dd..74f2761 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s
@@ -31,7 +31,7 @@ resign_xpaci_good:
         xpaci   x16
         cmp     x0, x16
         b.eq    1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -46,7 +46,7 @@ resign_xpacd_good:
         xpacd   x16
         cmp     x0, x16
         b.eq    1f
-        brk     0x1234
+        brk     0xc473
 1:
         pacda   x0, x2
         ret
@@ -117,7 +117,7 @@ resign_xpaci_unrelated_auth_and_check:
         xpaci   x16
         cmp     x0, x16
         b.eq    1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x10, x2
         ret
@@ -139,7 +139,7 @@ resign_xpaci_wrong_pattern_1:
         xpaci   x16
         cmp     x0, x16
         b.eq    1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -157,7 +157,7 @@ resign_xpaci_wrong_pattern_2:
         xpaci   x0        // x0 instead of x16
         cmp     x0, x16
         b.eq    1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -174,7 +174,7 @@ resign_xpaci_wrong_pattern_3:
         xpaci   x16
         cmp     x16, x16  // x16 instead of x0
         b.eq    1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -191,7 +191,7 @@ resign_xpaci_wrong_pattern_4:
         xpaci   x16
         cmp     x0, x0    // x0 instead of x16
         b.eq    1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -208,7 +208,7 @@ resign_xpaci_wrong_pattern_5:
         mov     x16, x16  // replace xpaci with a no-op instruction
         cmp     x0, x16
         b.eq    1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -228,7 +228,7 @@ resign_xpaclri_good:
         xpaclri
         cmp     x30, x16
         b.eq    1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x30, x2
 
@@ -246,7 +246,7 @@ xpaclri_check_keeps_lr_safe:
         xpaclri         // clobbers LR
         cmp     x30, x16
         b.eq    1f
-        brk     0x1234    // marks LR as trusted and safe-to-dereference
+        brk     0xc471    // marks LR as trusted and safe-to-dereference
 1:
         ret             // not reporting non-protected return
         .size xpaclri_check_keeps_lr_safe, .-xpaclri_check_keeps_lr_safe
@@ -265,7 +265,7 @@ xpaclri_check_requires_safe_lr:
         xpaclri
         cmp     x30, x16
         b.eq    1f
-        brk     0x1234
+        brk     0xc471
 1:
         ret
         .size xpaclri_check_requires_safe_lr, .-xpaclri_check_requires_safe_lr
@@ -283,7 +283,7 @@ resign_xpaclri_wrong_reg:
         xpaclri         // ... but xpaclri still operates on x30
         cmp     x20, x16
         b.eq    1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x20, x2
 
@@ -303,7 +303,7 @@ resign_checked_not_authenticated:
         xpaci   x16
         cmp     x0, x16
         b.eq    1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -323,7 +323,7 @@ resign_checked_before_authenticated:
         xpaci   x16
         cmp     x0, x16
         b.eq    1f
-        brk     0x1234
+        brk     0xc471
 1:
         autib   x0, x1
         pacia   x0, x2
@@ -339,7 +339,7 @@ resign_high_bits_tbz_good:
         autib   x0, x1
         eor     x16, x0, x0, lsl #1
         tbz     x16, #62, 1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -378,7 +378,7 @@ resign_high_bits_tbz_wrong_bit:
         autib   x0, x1
         eor     x16, x0, x0, lsl #1
         tbz     x16, #63, 1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -393,7 +393,7 @@ resign_high_bits_tbz_wrong_shift_amount:
         autib   x0, x1
         eor     x16, x0, x0, lsl #2
         tbz     x16, #62, 1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -408,7 +408,7 @@ resign_high_bits_tbz_wrong_shift_type:
         autib   x0, x1
         eor     x16, x0, x0, lsr #1
         tbz     x16, #62, 1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -423,7 +423,7 @@ resign_high_bits_tbz_wrong_pattern_1:
         autib   x0, x1
         eor     x16, x0, x0, lsl #1
         tbz     x17, #62, 1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -438,7 +438,7 @@ resign_high_bits_tbz_wrong_pattern_2:
         autib   x0, x1
         eor     x16, x10, x0, lsl #1
         tbz     x16, #62, 1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -453,7 +453,7 @@ resign_high_bits_tbz_wrong_pattern_3:
         autib   x0, x1
         eor     x16, x0, x10, lsl #1
         tbz     x16, #62, 1f
-        brk     0x1234
+        brk     0xc471
 1:
         pacia   x0, x2
         ret
@@ -648,7 +648,7 @@ many_checked_regs:
         xpacd   x16       // ...
         cmp     x2, x16   // ...
         b.eq    2f        // end of basic block
-        brk     0x1234
+        brk     0xc473
 2:
         pacdza  x0
         pacdza  x1
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s
index c314bc7..f44ba21 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s
@@ -79,7 +79,7 @@ good_explicit_check:
         autia   x0, x1
         eor     x16, x0, x0, lsl #1
         tbz     x16, #62, 1f
-        brk     0x1234
+        brk     0xc470
 1:
         ret
         .size good_explicit_check, .-good_explicit_check
@@ -373,7 +373,7 @@ good_explicit_check_multi_bb:
 1:
         eor     x16, x0, x0, lsl #1
         tbz     x16, #62, 2f
-        brk     0x1234
+        brk     0xc470
 2:
         cbz     x1, 3f
         nop
@@ -685,8 +685,7 @@ good_address_arith_nocfg:
         .globl  good_explicit_check_unrelated_reg
         .type   good_explicit_check_unrelated_reg,@function
 good_explicit_check_unrelated_reg:
-// CHECK-LABEL: GS-PAUTH: authentication oracle found in function good_explicit_check_unrelated_reg, basic block {{[^,]+}}, at address
-        // FIXME: The below instruction is not an authentication oracle
+// CHECK-NOT: good_explicit_check_unrelated_reg
         autia   x2, x3    // One of possible execution paths after this instruction
                           // ends at BRK below, thus BRK used as a trap instruction
                           // should formally "check everything" not to introduce
@@ -694,7 +693,7 @@ good_explicit_check_unrelated_reg:
         autia   x0, x1
         eor     x16, x0, x0, lsl #1
         tbz     x16, #62, 1f
-        brk     0x1234
+        brk     0xc470
 1:
         ldr     x4, [x2]  // Right before this instruction X2 is checked - this
                           // should be propagated to the basic block ending with
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
index 3a4d383..4d4bb7b 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
@@ -57,7 +57,7 @@ good_sign_auted_checked_brk:
         autda   x0, x2
         eor     x16, x0, x0, lsl #1
         tbz     x16, #62, 1f
-        brk     0x1234
+        brk     0xc472
 1:
         pacda   x0, x1
         ret
@@ -351,7 +351,7 @@ good_sign_auted_checked_brk_multi_bb:
 1:
         eor     x16, x0, x0, lsl #1
         tbz     x16, #62, 2f
-        brk     0x1234
+        brk     0xc472
 2:
         cbz     x4, 3f
         nop
@@ -705,7 +705,7 @@ good_resign_with_increment_brk:
         add     x0, x0, #8
         eor     x16, x0, x0, lsl #1
         tbz     x16, #62, 1f
-        brk     0x1234
+        brk     0xc472
 1:
         mov     x2, x0
         pacda   x2, x1
diff --git a/bolt/test/binary-analysis/AArch64/trap-instructions.s b/bolt/test/binary-analysis/AArch64/trap-instructions.s
new file mode 100644
index 0000000..7810b2d
--- /dev/null
+++ b/bolt/test/binary-analysis/AArch64/trap-instructions.s
@@ -0,0 +1,213 @@
+// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe -Wl,--emit-relocs
+// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s
+
+// Test what instructions can be used to terminate the program abnormally
+// on security violation.
+//
+// All test cases have the same structure:
+//
+//      cbz     x0, 1f    // [a], ensures [c] is never reported as unreachable
+//      autia   x2, x3
+//      cbz     x1, 2f    // [b]
+//      [instruction under test]
+// 1:
+//      ret               // [c]
+// 2:
+//      ldr     x0, [x2]
+//      ret
+//
+// This is to handle three possible cases: the instruction under test may be
+// considered by BOLT as
+// * trapping (and thus no-return): after being authenticated, x2 is ether
+//   checked by LDR (if [b] is taken) or the program is terminated
+//   immediately without leaking x2 (if [b] falls through to the trapping
+//   instruction under test). Nothing is reported.
+// * non-trapping, but no-return (such as calling abort()): x2 is leaked if [b]
+//   falls through. Authentication oracle is reported.
+// * non-trapping and falling-through (i.e. a regular instruction):
+//   x2 is leaked by [c]. Authentication oracle is reported.
+
+        .text
+
+        .globl  brk_key_ia
+        .type   brk_key_ia,@function
+brk_key_ia:
+// CHECK-NOT: brk_key_ia
+        cbz     x0, 1f
+        autia   x2, x3
+        cbz     x1, 2f
+        brk     0xc470
+1:
+        ret
+2:
+        ldr     x0, [x2]
+        ret
+        .size   brk_key_ia, .-brk_key_ia
+
+        .globl  brk_key_ib
+        .type   brk_key_ib,@function
+brk_key_ib:
+// CHECK-NOT: brk_key_ib
+        cbz     x0, 1f
+        autia   x2, x3
+        cbz     x1, 2f
+        brk     0xc471
+1:
+        ret
+2:
+        ldr     x0, [x2]
+        ret
+        .size   brk_key_ib, .-brk_key_ib
+
+        .globl  brk_key_da
+        .type   brk_key_da,@function
+brk_key_da:
+// CHECK-NOT: brk_key_da
+        cbz     x0, 1f
+        autia   x2, x3
+        cbz     x1, 2f
+        brk     0xc472
+1:
+        ret
+2:
+        ldr     x0, [x2]
+        ret
+        .size   brk_key_da, .-brk_key_da
+
+        .globl  brk_key_db
+        .type   brk_key_db,@function
+brk_key_db:
+// CHECK-NOT: brk_key_db
+        cbz     x0, 1f
+        autia   x2, x3
+        cbz     x1, 2f
+        brk     0xc473
+1:
+        ret
+2:
+        ldr     x0, [x2]
+        ret
+        .size   brk_key_db, .-brk_key_db
+
+// The immediate operand of BRK instruction may indicate whether the instruction
+// is intended to be a non-recoverable trap: for example, for this code
+//
+//     int test_trap(void) {
+//       __builtin_trap();
+//       return 42;
+//     }
+//     int test_debugtrap(void) {
+//       __builtin_debugtrap();
+//       return 42;
+//     }
+//
+// Clang produces the following assembly:
+//
+//     test_trap:
+//             brk     #0x1
+//     test_debugtrap:
+//             brk     #0xf000
+//             mov     w0, #42
+//             ret
+//
+// In GCC, __builtin_trap() uses "brk 0x3e8" (i.e. decimal 1000) and
+// __builtin_debugtrap() is not supported.
+//
+// At the time of writing these test cases, any BRK instruction is considered
+// no-return by BOLT, thus it ends its basic block and prevents falling through
+// to the next BB.
+// FIXME: Make BOLT handle __builtin_debugtrap() properly from the CFG point
+//        of view.
+
+        .globl  brk_gcc_builtin_trap
+        .type   brk_gcc_builtin_trap,@function
+brk_gcc_builtin_trap:
+// CHECK-NOT: brk_gcc_builtin_trap
+        cbz     x0, 1f
+        autia   x2, x3
+        cbz     x1, 2f
+        brk     0x3e8     // __builtin_trap()
+1:
+        ret
+2:
+        ldr     x0, [x2]
+        ret
+        .size   brk_gcc_builtin_trap, .-brk_gcc_builtin_trap
+
+        .globl  brk_clang_builtin_trap
+        .type   brk_clang_builtin_trap,@function
+brk_clang_builtin_trap:
+// CHECK-NOT: brk_clang_builtin_trap
+        cbz     x0, 1f
+        autia   x2, x3
+        cbz     x1, 2f
+        brk     0x1       // __builtin_trap()
+1:
+        ret
+2:
+        ldr     x0, [x2]
+        ret
+        .size   brk_clang_builtin_trap, .-brk_clang_builtin_trap
+
+        .globl  brk_clang_builtin_debugtrap
+        .type   brk_clang_builtin_debugtrap,@function
+brk_clang_builtin_debugtrap:
+// CHECK-LABEL: GS-PAUTH: authentication oracle found in function brk_clang_builtin_debugtrap, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autia   x2, x3
+// CHECK-NEXT:  The 0 instructions that leak the affected registers are:
+        cbz     x0, 1f
+        autia   x2, x3
+        cbz     x1, 2f
+        brk     0xf000    // __builtin_debugtrap()
+1:
+        ret
+2:
+        ldr     x0, [x2]
+        ret
+        .size   brk_clang_builtin_debugtrap, .-brk_clang_builtin_debugtrap
+
+// Conservatively assume BRK with an unknown immediate operand as not suitable
+// for terminating the program on security violation.
+        .globl  brk_unknown_imm
+        .type   brk_unknown_imm,@function
+brk_unknown_imm:
+// CHECK-LABEL: GS-PAUTH: authentication oracle found in function brk_unknown_imm, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autia   x2, x3
+// CHECK-NEXT:  The 0 instructions that leak the affected registers are:
+        cbz     x0, 1f
+        autia   x2, x3
+        cbz     x1, 2f
+        brk     0x3572
+1:
+        ret
+2:
+        ldr     x0, [x2]
+        ret
+        .size   brk_unknown_imm, .-brk_unknown_imm
+
+// Conservatively assume calling the abort() function may be an unsafe way to
+// terminate the program, as there is some amount of instructions that would
+// be executed when the program state is already tampered with.
+        .globl  call_abort_fn
+        .type   call_abort_fn,@function
+call_abort_fn:
+// CHECK-LABEL: GS-PAUTH: authentication oracle found in function call_abort_fn, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autia   x2, x3
+// CHECK-NEXT:  The 0 instructions that leak the affected registers are:
+        cbz     x0, 1f
+        autia   x2, x3
+        cbz     x1, 2f
+        b       abort    // a no-return tail call to abort()
+1:
+        ret
+2:
+        ldr     x0, [x2]
+        ret
+        .size   call_abort_fn, .-call_abort_fn
+
+        .globl  main
+        .type   main,@function
+main:
+        mov     x0, 0
+        ret
+        .size   main, .-main
diff --git a/bolt/test/dump-dot-func.test b/bolt/test/dump-dot-func.test
new file mode 100644
index 0000000..f05bfc1
--- /dev/null
+++ b/bolt/test/dump-dot-func.test
@@ -0,0 +1,52 @@
+# Test the --dump-dot-func option with multiple functions 
+# (includes tests for both mangled/unmangled names)
+
+RUN: %clangxx %p/Inputs/multi-func.cpp -o %t.exe -Wl,-q
+
+# Test 1: --dump-dot-func with specific function name (mangled)
+RUN: llvm-bolt %t.exe -o %t.bolt1 --dump-dot-func=_Z3addii -v=1 2>&1 | FileCheck %s --check-prefix=ADD
+
+# Test 2: --dump-dot-func with regex pattern (main.*)
+RUN: llvm-bolt %t.exe -o %t.bolt2 --dump-dot-func="main.*" -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-REGEX
+
+# Test 3: --dump-dot-func with multiple specific functions (mangled names)
+RUN: llvm-bolt %t.exe -o %t.bolt3 --dump-dot-func=_Z3addii,_Z8multiplyii -v=1 2>&1 | FileCheck %s --check-prefix=MULTI
+
+# Test 4: No option specified should create no dot files
+RUN: llvm-bolt %t.exe -o %t.bolt4 2>&1 | FileCheck %s --check-prefix=NONE
+
+# Test 5: --dump-dot-func with non-existent function
+RUN: llvm-bolt %t.exe -o %t.bolt5 --dump-dot-func=nonexistent -v=1 2>&1 | FileCheck %s --check-prefix=NONEXISTENT
+
+# Test 6: Backward compatibility - --dump-dot-all should still work
+RUN: llvm-bolt %t.exe -o %t.bolt6 --dump-dot-all -v=1 2>&1 | FileCheck %s --check-prefix=ALL
+
+# Test 7: Test with unmangled function name (main function)
+RUN: llvm-bolt %t.exe -o %t.bolt7 --dump-dot-func=main -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-UNMANGLED
+
+# Check that specific functions are dumped
+ADD: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
+ADD-NOT: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
+ADD-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
+ADD-NOT: BOLT-INFO: dumping CFG to _Z11main_helperv-00_build-cfg.dot
+
+MAIN-REGEX-DAG: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
+MAIN-REGEX-NOT: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
+MAIN-REGEX-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
+
+MULTI-DAG: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
+MULTI-DAG: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
+MULTI-NOT: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
+MULTI-NOT: BOLT-INFO: dumping CFG to _Z11main_helperv-00_build-cfg.dot
+
+# Should be no dumping messages when no option is specified
+NONE-NOT: BOLT-INFO: dumping CFG
+
+# Should be no dumping messages for non-existent function
+NONEXISTENT-NOT: BOLT-INFO: dumping CFG
+
+ALL: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
+
+MAIN-UNMANGLED: BOLT-INFO: dumping CFG to main-00_build-cfg.dot
+MAIN-UNMANGLED-NOT: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot
+MAIN-UNMANGLED-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
+\ No newline at end of file
diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py
index 898dce8..42aed64 100755
--- a/bolt/test/link_fdata.py
+++ b/bolt/test/link_fdata.py
@@ -9,6 +9,7 @@ respective anchor symbols, and prints the resulting file to stdout.
 
 import argparse
 import os
+import platform
 import shutil
 import subprocess
 import sys
@@ -19,7 +20,11 @@ parser.add_argument("input")
 parser.add_argument("objfile", help="Object file to extract symbol values from")
 parser.add_argument("output")
 parser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix")
-parser.add_argument("--nmtool", default="nm", help="Path to nm tool")
+parser.add_argument(
+    "--nmtool",
+    default="llvm-nm" if platform.system() == "Windows" else "nm",
+    help="Path to nm tool",
+)
 parser.add_argument("--no-lbr", action="store_true")
 parser.add_argument("--no-redefine", action="store_true")
 
@@ -27,7 +32,7 @@ args = parser.parse_args()
 
 # Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated
 # profile data
-prefix_pat = re.compile(f"^# {args.prefix}: (.*)")
+prefix_pat = re.compile(f"^(#|//) {args.prefix}: (.*)")
 
 # FDATA records:
 # <is symbol?> <closest elf symbol or DSO name> <relative FROM address>
@@ -56,7 +61,7 @@ with open(args.input, "r") as f:
         prefix_match = prefix_pat.match(line)
         if not prefix_match:
             continue
-        profile_line = prefix_match.group(1)
+        profile_line = prefix_match.group(2)
         fdata_match = fdata_pat.match(profile_line)
         preagg_match = preagg_pat.match(profile_line)
         nolbr_match = nolbr_pat.match(profile_line)
@@ -86,7 +91,10 @@ with open(args.input, "r") as f:
             exit("ERROR: unexpected input:\n%s" % line)
 
 # Read nm output: <symbol value> <symbol type> <symbol name>
-is_llvm_nm = os.path.basename(os.path.realpath(shutil.which(args.nmtool))) == "llvm-nm"
+# Ignore .exe on Windows host.
+is_llvm_nm = os.path.basename(os.path.realpath(shutil.which(args.nmtool))).startswith(
+    "llvm-nm"
+)
 nm_output = subprocess.run(
     [
         args.nmtool,
diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py
index 0d05229..bef570b 100644
--- a/bolt/test/lit.cfg.py
+++ b/bolt/test/lit.cfg.py
@@ -18,11 +18,22 @@ from lit.llvm.subst import FindTool
 # name: The name of this test suite.
 config.name = "BOLT"
 
+# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites.
+# See https://github.com/llvm/llvm-project/issues/106636 for more details.
+#
+# We prefer the lit internal shell which provides a better user experience on failures
+# and is faster unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0
+# env var.
+use_lit_shell = True
+lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL")
+if lit_shell_env:
+    use_lit_shell = lit.util.pythonize_bool(lit_shell_env)
+
 # testFormat: The test format to use to interpret tests.
 #
 # For now we require '&&' between commands, until they get globally killed and
 # the test runner updated.
-config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
+config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell)
 
 # suffixes: A list of file extensions to treat as test files.
 config.suffixes = [
diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test
index 434d4d2..08b3413 100644
--- a/bolt/test/perf2bolt/perf_test.test
+++ b/bolt/test/perf2bolt/perf_test.test
@@ -2,7 +2,7 @@
 
 REQUIRES: system-linux, perf
 
-RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t
+RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -pie -Wl,--script=%S/Inputs/perf_test.lds -o %t
 RUN: perf record -Fmax -e cycles:u -o %t2 -- %t
 RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id --show-density \
 RUN:   --heatmap %t.hm 2>&1 | FileCheck %s
diff --git a/bolt/test/permission.test b/bolt/test/permission.test
index f495e87..ecb51fc 100644
--- a/bolt/test/permission.test
+++ b/bolt/test/permission.test
@@ -1,13 +1,28 @@
 # Ensure that the permissions of the optimized binary file comply with the
 # system's umask.
 
-# This test performs a logical AND operation on the results of the `stat -c %a
-# %t.bolt` and `umask` commands (both results are displayed in octal), and
-# checks whether the result is equal to 0.
-REQUIRES: shell, system-linux
+# This test uses umask, which is Linux specific.
+REQUIRES: system-linux
 
-RUN: %clang %cflags %p/Inputs/hello.c -o %t -Wl,-q
-RUN: llvm-bolt %t -o %t.bolt
-RUN: echo $(( 8#$(stat -c %a %t.bolt) & 8#$(umask) )) | FileCheck %s
+# RUN: rm -f %t
+# RUN: touch %t
+# RUN: chmod 0755 %t
+# RUN: ls -l %t | cut -f 1 -d ' ' > %t.0755
+# RUN: chmod 0600 %t
+# RUN: ls -l %t | cut -f 1 -d ' ' > %t.0600
+# RUN: chmod 0655 %t
+# RUN: ls -l %t | cut -f 1 -d ' ' > %t.0655
 
-CHECK: 0
+RUN: %clang %cflags %p/Inputs/hello.c -o %t.exe -Wl,-q
+
+RUN: umask 0022
+RUN: llvm-bolt %t.exe -o %t1
+RUN: ls -l %t1 | cut -f 1 -d ' ' | cmp - %t.0755
+
+RUN: umask 0177
+RUN: llvm-bolt %t.exe -o %t2
+RUN: ls -l %t2 | cut -f 1 -d ' ' | cmp - %t.0600
+
+RUN: umask 0122
+RUN: llvm-bolt %t.exe -o %t3
+RUN: ls -l %t3 | cut -f 1 -d ' ' | cmp - %t.0655
diff --git a/bolt/test/runtime/X86/tail-duplication-constant-prop.s b/bolt/test/runtime/X86/tail-duplication-constant-prop.s
index 863c6ff..c28c2f4 100644
--- a/bolt/test/runtime/X86/tail-duplication-constant-prop.s
+++ b/bolt/test/runtime/X86/tail-duplication-constant-prop.s
@@ -8,8 +8,8 @@
 # RUN:    --print-finalized \
 # RUN:    --tail-duplication=moderate --tail-duplication-minimum-offset=1 \
 # RUN:    --tail-duplication-const-copy-propagation=1 -o %t.out | FileCheck %s
-# RUN: %t.exe; echo $?
-# RUN: %t.out; echo $?
+# RUN: not %t.exe
+# RUN: not %t.out
 
 # FDATA: 1 main 14 1 main #.BB2# 0 10
 # FDATA: 1 main 16 1 main #.BB2# 0 20
diff --git a/bolt/test/runtime/copy_file.py b/bolt/test/runtime/copy_file.py
new file mode 100644
index 0000000..14db9d0
--- /dev/null
+++ b/bolt/test/runtime/copy_file.py
@@ -0,0 +1,15 @@
+import sys
+import shutil
+
+with open(sys.argv[1] + ".output") as log_file:
+    lines = log_file.readlines()
+    for line in lines:
+        if line.startswith(sys.argv[2]):
+            pid = line.split(" ")[1].strip()
+            shutil.copy(
+                sys.argv[1] + "." + pid + ".fdata",
+                sys.argv[1] + "." + sys.argv[3] + ".fdata",
+            )
+            sys.exit(0)
+
+sys.exit(1)
diff --git a/bolt/test/runtime/instrumentation-indirect-2.c b/bolt/test/runtime/instrumentation-indirect-2.c
index 7d19db1..4883d9b 100644
--- a/bolt/test/runtime/instrumentation-indirect-2.c
+++ b/bolt/test/runtime/instrumentation-indirect-2.c
@@ -50,7 +50,7 @@ int main() {
   return 0;
 }
 /*
-REQUIRES: system-linux,shell,fuser
+REQUIRES: system-linux,fuser
 
 RUN: %clang %cflags %s -o %t.exe -Wl,-q -pie -fpie
 
@@ -61,10 +61,14 @@ RUN: --instrumentation-wait-forks
 
 # Instrumented program needs to finish returning zero
 # Both output and profile must contain all 16 functions
-RUN: %t.instrumented_conservative > %t.output
-# Wait for profile and output to be fully written
-RUN: bash %S/wait_file.sh %t.output
-RUN: bash %S/wait_file.sh %t.fdata
+# We need to use bash to invoke this as otherwise we hang inside a
+# popen.communicate call in lit's internal shell. Eventually we should not
+# need this.
+# TODO(boomanaiden154): Remove once
+# https://github.com/llvm/llvm-project/issues/156484 is fixed.
+RUN: bash -c "%t.instrumented_conservative; wait" > %t.output
+# We can just read because we ensure the profile will be fully written by
+# calling wait inside the bash invocation.
 RUN: cat %t.output | FileCheck %s --check-prefix=CHECK-OUTPUT
 RUN: cat %t.fdata | FileCheck %s --check-prefix=CHECK-COMMON-PROF
 
@@ -112,14 +116,8 @@ RUN: bash %S/wait_file.sh %t.output
 # Make sure all functions were called
 RUN: cat %t.output | FileCheck %s --check-prefix=CHECK-OUTPUT
 
-RUN: child_pid=$(cat %t.output | grep funcA | awk '{print $2;}')
-RUN: par_pid=$(cat %t.output | grep funcB | awk '{print $2;}')
-
-RUN: bash %S/wait_file.sh %t.$child_pid.fdata
-RUN: bash %S/wait_file.sh %t.$par_pid.fdata
-
-RUN: mv %t.$child_pid.fdata %t.child.fdata
-RUN: mv %t.$par_pid.fdata %t.parent.fdata
+RUN: %python %S/copy_file.py %t funcA child
+RUN: %python %S/copy_file.py %t funcB parent
 
 # Instrumented binary must produce two profiles with only local calls
 # recorded. Functions called only in child should not appear in parent's
diff --git a/bolt/test/timers.c b/bolt/test/timers.c
index a34958a..59bd0d5 100644
--- a/bolt/test/timers.c
+++ b/bolt/test/timers.c
@@ -1,22 +1,21 @@
-/* This test checks timers for metadata manager phases.
-# RUN: %clang %cflags %s -o %t.exe
-# RUN: link_fdata %s %t.exe %t.fdata
-# RUN: llvm-bolt %t.exe -o %t.null --data %t.fdata -w %t.yaml --time-rewrite \
-# RUN:   2>&1 | FileCheck %s
-# RUN: link_fdata %s %t.exe %t.preagg PREAGG
-# RUN: perf2bolt %t.exe -o %t.null -p %t.preagg --pa --time-rewrite \
-# RUN:   2>&1 | FileCheck %s --check-prefix=CHECK-P2B
+// This test checks timers for metadata manager phases.
+// RUN: %clang %cflags %s -o %t.exe
+// RUN: link_fdata %s %t.exe %t.fdata
+// RUN: llvm-bolt %t.exe -o %t.null --data %t.fdata -w %t.yaml --time-rewrite \
+// RUN:   2>&1 | FileCheck %s
+// RUN: link_fdata %s %t.exe %t.preagg PREAGG
+// RUN: perf2bolt %t.exe -o %t.null -p %t.preagg --pa --time-rewrite \
+// RUN:   2>&1 | FileCheck %s --check-prefix=CHECK-P2B
 
-# CHECK-DAG: update metadata post-emit
-# CHECK-DAG: process section metadata
-# CHECK-DAG: process metadata pre-CFG
-# CHECK-DAG: process metadata post-CFG
-# CHECK-DAG: finalize metadata pre-emit
+// CHECK-DAG: update metadata post-emit
+// CHECK-DAG: process section metadata
+// CHECK-DAG: process metadata pre-CFG
+// CHECK-DAG: process metadata post-CFG
+// CHECK-DAG: finalize metadata pre-emit
 
-# CHECK-P2B-DAG: process section metadata
-# CHECK-P2B-DAG: process metadata pre-CFG
+// CHECK-P2B-DAG: process section metadata
+// CHECK-P2B-DAG: process metadata pre-CFG
 
-# FDATA: 0 [unknown] 0 1 main 0 1 0
-# PREAGG: B X:0 #main# 1 0
-*/
+// FDATA: 0 [unknown] 0 1 main 0 1 0
+// PREAGG: B X:0 #main# 1 0
 int main() { return 0; }