diff options
Diffstat (limited to 'bolt/test')
26 files changed, 1037 insertions, 122 deletions
diff --git a/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s new file mode 100644 index 0000000..3bcbcbb --- /dev/null +++ b/bolt/test/AArch64/data-marker-invalidates-extra-entrypoint.s @@ -0,0 +1,38 @@ +# This test is to ensure that we query data marker symbols to avoid +# misidentifying constant data island symbol as extra entry point. + +# RUN: %clang %cflags %s -o %t.so -Wl,-q -Wl,--init=_bar -Wl,--fini=_bar +# RUN: llvm-bolt %t.so -o %t.instr.so + + .text + .global _start + .type _start, %function +_start: + ret + + .text + .global _foo + .type _foo, %function +_foo: + cbz x1, _foo_2 +_foo_1: + add x1, x2, x0 + b _foo +_foo_2: + ret + +# None of these constant island symbols should be identified as extra entry +# point for function `_foo'. + .align 4 +_const1: .short 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80 +_const2: .short 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0 +_const3: .short 0x04, 0x08, 0x0c, 0x20, 0x60, 0x80, 0xa0, 0xc0 + + .text + .global _bar + .type _bar, %function +_bar: + ret + + # Dummy relocation to force relocation mode + .reloc 0, R_AARCH64_NONE diff --git a/bolt/test/AArch64/unsupported-passes.test b/bolt/test/AArch64/unsupported-passes.test new file mode 100644 index 0000000..886fc1c --- /dev/null +++ b/bolt/test/AArch64/unsupported-passes.test @@ -0,0 +1,8 @@ +// Checks that non-fully supported passes on AArch64 are handled appropriately. + +// REQUIRES: system-linux,asserts,target=aarch64{{.*}} + +RUN: %clang %cflags %p/../Inputs/hello.c -o %t -Wl,-q +RUN: not llvm-bolt %t -o %t.bolt --frame-opt=all 2>&1 | FileCheck %s + +CHECK: BOLT-ERROR: frame-optimizer is supported only on X86 diff --git a/bolt/test/AArch64/validate-secondary-entry-point.s b/bolt/test/AArch64/validate-secondary-entry-point.s index 0099a0e..3ad6946 100644 --- a/bolt/test/AArch64/validate-secondary-entry-point.s +++ b/bolt/test/AArch64/validate-secondary-entry-point.s @@ -1,13 +1,23 @@ # This test is to verify that BOLT won't take a label pointing to constant -# island as a secondary entry point (function `_start` doesn't have ELF size -# set originally) and the function won't otherwise be mistaken as non-simple. +# island as a secondary entry point. This could happen when function doesn't +# have ELF size set if it is from assembly code, or a constant island is +# referenced by another function discovered during relocation processing. -# RUN: %clang %cflags -pie %s -o %t.so -Wl,-q -Wl,--init=_foo -Wl,--fini=_foo +# RUN: split-file %s %t + +# RUN: %clang %cflags -pie %t/tt.asm -o %t.so \ +# RUN: -Wl,-q -Wl,--init=_foo -Wl,--fini=_foo # RUN: llvm-bolt %t.so -o %t.bolt.so --print-cfg 2>&1 | FileCheck %s # CHECK-NOT: BOLT-WARNING: reference in the middle of instruction detected \ # CHECK-NOT: function _start at offset 0x{{[0-9a-f]+}} # CHECK: Binary Function "_start" after building cfg +# RUN: %clang %cflags -ffunction-sections -shared %t/tt.c %t/ss.c -o %tt.so \ +# RUN: -Wl,-q -Wl,--init=_start -Wl,--fini=_start \ +# RUN: -Wl,--version-script=%t/linker_script +# RUN: llvm-bolt %tt.so -o %tt.bolted.so + +;--- tt.asm .text .global _foo @@ -32,3 +42,31 @@ _bar: # Dummy relocation to force relocation mode .reloc 0, R_AARCH64_NONE + +;--- tt.c +void _start() {} + +__attribute__((naked)) void foo() { + asm("ldr x16, .L_fnptr\n" + "blr x16\n" + "ret\n" + + "_rodatx:" + ".global _rodatx;" + ".quad 0;" + ".L_fnptr:" + ".quad 0;"); +} + +;--- ss.c +__attribute__((visibility("hidden"))) extern void* _rodatx; +void* bar() { return &_rodatx; } + +;--- linker_script +{ +global: + _start; + foo; + bar; +local: *; +}; diff --git a/bolt/test/AArch64/veneer-lld-abs.s b/bolt/test/AArch64/veneer-lld-abs.s index b22301d..77d6f0ce2 100644 --- a/bolt/test/AArch64/veneer-lld-abs.s +++ b/bolt/test/AArch64/veneer-lld-abs.s @@ -12,7 +12,7 @@ ## Occasionally, we see the linker not generating $d symbols for long veneers ## causing BOLT to fail veneer elimination. -# RUN: llvm-objcopy --remove-symbol-prefix=\$d %t.exe %t.no-marker.exe +# RUN: llvm-objcopy --remove-symbol-prefix='$d' %t.exe %t.no-marker.exe # RUN: llvm-bolt %t.no-marker.exe -o %t.no-marker.bolt \ # RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-BOLT # RUN: llvm-objdump -d -j .text %t.no-marker.bolt | \ diff --git a/bolt/test/Inputs/multi-func.cpp b/bolt/test/Inputs/multi-func.cpp new file mode 100644 index 0000000..61c968f --- /dev/null +++ b/bolt/test/Inputs/multi-func.cpp @@ -0,0 +1,24 @@ +#include <iostream> + +// Multiple functions to test selective dumping +int add(int a, int b) { return a + b; } + +int multiply(int a, int b) { return a * b; } + +int main_helper() { + std::cout << "Helper function" << std::endl; + return 42; +} + +int main_secondary() { return add(5, 3); } + +void other_function() { std::cout << "Other function" << std::endl; } + +int main() { + int result = add(10, 20); + result = multiply(result, 2); + main_helper(); + main_secondary(); + other_function(); + return result; +} diff --git a/bolt/test/X86/double-jump.test b/bolt/test/X86/double-jump.test index 424747c..94b1578 100644 --- a/bolt/test/X86/double-jump.test +++ b/bolt/test/X86/double-jump.test @@ -1,15 +1,11 @@ ## Test the double jump removal peephole. -## This test has commands that rely on shell capabilities that won't execute -## correctly on Windows e.g. subshell execution -REQUIRES: shell - RUN: %clangxx %cxxflags %p/Inputs/double_jump.cpp -o %t.exe -RUN: (llvm-bolt %t.exe --peepholes=double-jumps \ -RUN: --eliminate-unreachable -o %t 2>&1 \ -RUN: && llvm-objdump -d %t --print-imm-hex --no-show-raw-insn) | FileCheck %s +RUN: llvm-bolt %t.exe --peepholes=double-jumps \ +RUN: --eliminate-unreachable -o %t | FileCheck --check-prefix CHECK-BOLT %s +RUN: llvm-objdump -d %t --print-imm-hex --no-show-raw-insn | FileCheck %s -CHECK: BOLT-INFO: Peephole: 1 double jumps patched. +CHECK-BOLT: BOLT-INFO: Peephole: 1 double jumps patched. CHECK: <_Z3foom>: CHECK-NEXT: pushq %rbp diff --git a/bolt/test/X86/dwarf5-debug-line-print.s b/bolt/test/X86/dwarf5-debug-line-print.s new file mode 100644 index 0000000..b0a5bab --- /dev/null +++ b/bolt/test/X86/dwarf5-debug-line-print.s @@ -0,0 +1,148 @@ +# REQUIRES: system-linux + +## Check that BOLT correctly prints debug line comments for DWARF-5. + + +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t1.o +# RUN: %clang %cflags -dwarf-5 %t1.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \ +# RUN: --print-after-lowering -o %t.bolt | FileCheck %s + +# CHECK: xorq %rdi, %rdi # debug line main.c:2:5 + +# __attribute__((naked)) void _start() { +# __asm__( +# "xor %rdi, %rdi\n" // exit code 0 +# "mov $60, %rax\n" // syscall number for exit +# "syscall\n" +# ); +# } + + .file "main.c" + .text + .globl _start # -- Begin function _start + .p2align 4 + .type _start,@function +_start: # @_start +.Lfunc_begin0: + .file 0 "/home/gpastukhov/tmp2" "main.c" md5 0x94c0e54a615c2a21415ddb904991abd8 + .cfi_startproc +# %bb.0: + .loc 0 2 5 prologue_end # main.c:2:5 + #APP + xorq %rdi, %rdi + movq $60, %rax + syscall + + #NO_APP +.Ltmp0: +.Lfunc_end0: + .size _start, .Lfunc_end0-_start + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x23 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 29 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .byte 2 # Abbrev [2] 0x23:0xb DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + .byte 3 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + # DW_AT_external + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 20 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 20.1.8 (CentOS 20.1.8-1.el9)" # string offset=0 +.Linfo_string1: + .asciz "main.c" # string offset=43 +.Linfo_string2: + .asciz "/home/gpastukhov/tmp2" # string offset=50 +.Linfo_string3: + .asciz "_start" # string offset=72 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 +.Ldebug_addr_end0: + .ident "clang version 20.1.8 (CentOS 20.1.8-1.el9)" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/dwarf5-two-cus.s b/bolt/test/X86/dwarf5-two-cus.s new file mode 100644 index 0000000..8b5afb4 --- /dev/null +++ b/bolt/test/X86/dwarf5-two-cus.s @@ -0,0 +1,251 @@ +## Check that BOLT correctly handles two CUs with DWARF-5 debug info (does not crash), when +## a function from one CU is forced to be inlined into another. + +# REQUIRES: system-linux + +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %s -o %t-main.o +# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5_helper.s -o %thelper.o +# RUN: %clang %cflags -gdwarf-5 -Wl,-q %t-main.o %thelper.o -o %t.exe +# RUN: llvm-bolt %t.exe --update-debug-sections --force-inline=_Z3fooi \ +# RUN: -o %t.bolt | FileCheck %s + +# CHECK-NOT: BOLT-ERROR +# CHECK-NOT: BOLT-WARNING +# CHECK: BOLT-INFO: inlined {{[0-9]+}} calls at {{[1-9][0-9]*}} call sites + +# extern int foo(int); +# int main(){ +# foo(10); +# return 0; +# } + .file "main.cpp" + .text + .globl main # -- Begin function main + .p2align 4 + .type main,@function +main: # @main +.Lfunc_begin0: + .file 0 "/home/gpastukhov/tmp2" "main.cpp" md5 0x5c930f5d3a068b09fd18ece59c58bdcf + .loc 0 2 0 # main.cpp:2:0 + .cfi_startproc +# %bb.0: + pushq %rax + .cfi_def_cfa_offset 16 +.Ltmp0: + .loc 0 3 5 prologue_end # main.cpp:3:5 + movl $10, %edi + callq _Z3fooi +.Ltmp1: + .loc 0 4 5 # main.cpp:4:5 + xorl %eax, %eax + .loc 0 4 5 epilogue_begin is_stmt 0 # main.cpp:4:5 + popq %rcx + .cfi_def_cfa_offset 8 + retq +.Ltmp2: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 122 # DW_AT_call_all_calls + .byte 25 # DW_FORM_flag_present + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 72 # DW_TAG_call_site + .byte 1 # DW_CHILDREN_yes + .byte 127 # DW_AT_call_origin + .byte 19 # DW_FORM_ref4 + .byte 125 # DW_AT_call_return_pc + .byte 27 # DW_FORM_addrx + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 73 # DW_TAG_call_site_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 126 # DW_AT_call_value + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x47 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .byte 2 # Abbrev [2] 0x23:0x1c DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 87 + # DW_AT_call_all_calls + .byte 6 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 2 # DW_AT_decl_line + .long 78 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x32:0xc DW_TAG_call_site + .long 63 # DW_AT_call_origin + .byte 1 # DW_AT_call_return_pc + .byte 4 # Abbrev [4] 0x38:0x5 DW_TAG_call_site_parameter + .byte 1 # DW_AT_location + .byte 85 + .byte 1 # DW_AT_call_value + .byte 58 + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x3f:0xf DW_TAG_subprogram + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 78 # DW_AT_type + # DW_AT_declaration + # DW_AT_external + .byte 6 # Abbrev [6] 0x48:0x5 DW_TAG_formal_parameter + .long 78 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 7 # Abbrev [7] 0x4e:0x4 DW_TAG_base_type + .byte 5 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 32 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 20.1.8 (CentOS 20.1.8-1.el9)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=43 +.Linfo_string2: + .asciz "/home/gpastukhov/tmp2" # string offset=52 +.Linfo_string3: + .asciz "_Z3fooi" # string offset=74 +.Linfo_string4: + .asciz "foo" # string offset=82 +.Linfo_string5: + .asciz "int" # string offset=86 +.Linfo_string6: + .asciz "main" # string offset=90 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .long .Linfo_string5 + .long .Linfo_string6 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 + .quad .Ltmp1 +.Ldebug_addr_end0: + .ident "clang version 20.1.8 (CentOS 20.1.8-1.el9)" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/X86/hlt-terminator.s b/bolt/test/X86/hlt-terminator.s new file mode 100644 index 0000000..3f67182 --- /dev/null +++ b/bolt/test/X86/hlt-terminator.s @@ -0,0 +1,24 @@ +## Check that HLT instruction is handled differently depending on the flags. +## It's a terminator in the user-level code, but the execution can resume in +## ring 0. + +# RUN: %clang %cflags %s -static -o %t.exe -nostdlib +# RUN: llvm-bolt %t.exe --print-cfg --print-only=main --terminal-x86-hlt=0 \ +# RUN: -o %t.ring0 2>&1 | FileCheck %s --check-prefix=CHECK-RING0 +# RUN: llvm-bolt %t.exe --print-cfg --print-only=main \ +# RUN: -o %t.ring3 2>&1 | FileCheck %s --check-prefix=CHECK-RING3 +# RUN: llvm-objdump -d %t.ring0 --print-imm-hex | FileCheck %s --check-prefix=CHECK-BIN + +# CHECK-RING0: BB Count : 1 +# CHECK-RING3: BB Count : 2 + +# CHECK-BIN: <main>: +# CHECK-BIN-NEXT: f4 hlt +# CHECK-BIN-NEXT: c3 retq + +.global main + .type main, %function +main: + hlt + retq +.size main, .-main diff --git a/bolt/test/X86/jmp-optimization.test b/bolt/test/X86/jmp-optimization.test index f969578..847c4822 100644 --- a/bolt/test/X86/jmp-optimization.test +++ b/bolt/test/X86/jmp-optimization.test @@ -1,10 +1,7 @@ ## Tests the optimization of functions that just do a tail call in the beginning. -## This test has commands that rely on shell capabilities that won't execute -## correctly on Windows e.g. unsupported parameter expansion -REQUIRES: shell - -RUN: %clangxx %cxxflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t +RUN: %clangxx %cxxflags -O2 %S/Inputs/jmp_opt.cpp %S/Inputs/jmp_opt2.cpp \ +RUN: %S/Inputs/jmp_opt3.cpp -o %t RUN: llvm-bolt -inline-small-functions %t -o %t.bolt RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s diff --git a/bolt/test/X86/jump-table-ambiguous-unreachable.s b/bolt/test/X86/jump-table-ambiguous-unreachable.s new file mode 100644 index 0000000..eb87b96 --- /dev/null +++ b/bolt/test/X86/jump-table-ambiguous-unreachable.s @@ -0,0 +1,87 @@ +## Check that llvm-bolt correctly updates ambiguous jump table entries that +## can correspond to either builtin_unreachable() or could be a pointer to +## the next function. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -no-pie -Wl,-q + +# RUN: llvm-bolt %t.exe --print-normalized --print-only=foo -o %t.out \ +# RUN: 2>&1 | FileCheck %s + + + + .text + .globl _start + .type _start, %function +_start: + .cfi_startproc + call foo + ret + .cfi_endproc + .size _start, .-_start + + .globl foo + .type foo, %function +foo: + .cfi_startproc +.LBB00: + movq 0x8(%rdi), %rdi + movzbl 0x1(%rdi), %eax +.LBB00_br: + jmpq *"JUMP_TABLE/foo.0"(,%rax,8) +# CHECK: jmpq {{.*}} # JUMPTABLE +# CHECK-NEXT: Successors: {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}} + +.Ltmp87085: + xorl %eax, %eax + retq + +.Ltmp87086: + cmpb $0x0, 0x8(%rdi) + setne %al + retq + +.Ltmp87088: + movb $0x1, %al + retq + +.Ltmp87087: + movzbl 0x14(%rdi), %eax + andb $0x2, %al + shrb %al + retq + + .cfi_endproc +.size foo, .-foo + + .globl bar + .type bar, %function +bar: + .cfi_startproc + ret + .cfi_endproc + .size bar, .-bar + +# Jump tables +.section .rodata + .global jump_table +jump_table: +"JUMP_TABLE/foo.0": + .quad bar + .quad .Ltmp87085 + .quad bar + .quad .Ltmp87086 + .quad .Ltmp87087 + .quad .LBB00 + .quad .Ltmp87088 + .quad bar + .quad .LBB00 + +# CHECK: Jump table {{.*}} for function foo +# CHECK-NEXT: 0x{{.*}} : bar +# CHECK-NEXT: 0x{{.*}} : +# CHECK-NEXT: 0x{{.*}} : bar +# CHECK-NEXT: 0x{{.*}} : +# CHECK-NEXT: 0x{{.*}} : diff --git a/bolt/test/X86/jump-table-icp.test b/bolt/test/X86/jump-table-icp.test index f147432..a095929 100644 --- a/bolt/test/X86/jump-table-icp.test +++ b/bolt/test/X86/jump-table-icp.test @@ -4,11 +4,7 @@ RUN: link_fdata %p/Inputs/jump_table_icp.s %t.o %t.fdata --nmtool llvm-nm RUN: llvm-strip --strip-unneeded %t.o RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q -## This test has commands that rely on shell capabilities that won't execute -## correctly on Windows e.g. subshell execution -REQUIRES: shell - -RUN: (llvm-bolt %t.exe --data %t.fdata -o %t --relocs \ +RUN: llvm-bolt %t.exe --data %t.fdata -o %t --relocs \ RUN: --reorder-blocks=cache --split-functions --split-all-cold \ RUN: --use-gnu-stack --dyno-stats --indirect-call-promotion=jump-tables \ RUN: --print-icp -v=0 \ @@ -16,8 +12,8 @@ RUN: --enable-bat --print-cache-metrics \ RUN: --icp-jt-remaining-percent-threshold=10 \ RUN: --icp-jt-total-percent-threshold=2 \ RUN: --indirect-call-promotion-topn=1 \ -RUN: --icp-jump-tables-targets --align-functions-max-bytes=7 2>&1 && \ -RUN: llvm-objdump -d %t --print-imm-hex) | FileCheck %s +RUN: --icp-jump-tables-targets --align-functions-max-bytes=7 | FileCheck %s +RUN: llvm-objdump -d %t --print-imm-hex | FileCheck --check-prefix CHECK-ASM %s BOLT-INFO: ICP total indirect callsites = 0 BOLT-INFO: ICP total jump table callsites = 2 @@ -107,14 +103,14 @@ CHECK-NEXT: Exec Count : 140 CHECK: Predecessors: .Ltmp{{.*}}, .LFT{{.*}} CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 98) -CHECK: <_Z3inci>: -CHECK: movq 0x{{.*}}(,%rax,8), %rax -CHECK-NEXT: cmpq $0x{{.*}}, %rax -CHECK-NEXT: je {{.*}} <_Z3inci+0x{{.*}}> -CHECK-NEXT: jmpq *%rax - -CHECK: <_Z7inc_dupi>: -CHECK: movq 0x{{.*}}(,%rax,8), %rax -CHECK-NEXT: cmpq $0x{{.*}}, %rax -CHECK-NEXT: je {{.*}} <_Z7inc_dupi+0x{{.*}}> -CHECK-NEXT: jmpq *%rax +CHECK-ASM: <_Z3inci>: +CHECK-ASM: movq 0x{{.*}}(,%rax,8), %rax +CHECK-ASM-NEXT: cmpq $0x{{.*}}, %rax +CHECK-ASM-NEXT: je {{.*}} <_Z3inci+0x{{.*}}> +CHECK-ASM-NEXT: jmpq *%rax + +CHECK-ASM: <_Z7inc_dupi>: +CHECK-ASM: movq 0x{{.*}}(,%rax,8), %rax +CHECK-ASM-NEXT: cmpq $0x{{.*}}, %rax +CHECK-ASM-NEXT: je {{.*}} <_Z7inc_dupi+0x{{.*}}> +CHECK-ASM-NEXT: jmpq *%rax diff --git a/bolt/test/X86/shrinkwrapping.test b/bolt/test/X86/shrinkwrapping.test index 521b456..5470b5d 100644 --- a/bolt/test/X86/shrinkwrapping.test +++ b/bolt/test/X86/shrinkwrapping.test @@ -2,23 +2,21 @@ ## shrink-wrapping when optimizing a function without ## frame pointers. -## This test has commands that rely on shell capabilities that won't execute -## correctly on Windows e.g. subshell execution to capture command output. -REQUIRES: shell - RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \ RUN: --print-only=main --print-cfg \ RUN: --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \ RUN: FileCheck %s --check-prefix=CHECK-BOLT -RUN: llvm-objdump --dwarf=frames %t.exe | grep -A20 -e \ -RUN: `llvm-nm --numeric-sort %t.exe | grep main | tail -n 1 | \ -RUN: cut -f1 -d' ' | tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-INPUT +RUN: llvm-nm --numeric-sort %t.exe | grep main | tail -n 1 | \ +RUN: cut -f1 -d' ' | tail -c9 > %t.input_address +RUN: llvm-objdump --dwarf=frames %t.exe | grep -A20 -f %t.input_address \ +RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-INPUT -RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \ -RUN: `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \ -RUN: tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT +RUN: llvm-nm --numeric-sort %t | grep main | tail -n 1 | \ +RUN: cut -f1 -d' ' | tail -c9 > %t.output_address +RUN: llvm-objdump --dwarf=frames %t | grep -A20 -f %t.output_address \ +RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT CHECK-BOLT: Extern Entry Count: 100 CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s b/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s index 3f982dd..74f2761 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-address-checks.s @@ -31,7 +31,7 @@ resign_xpaci_good: xpaci x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -46,7 +46,7 @@ resign_xpacd_good: xpacd x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc473 1: pacda x0, x2 ret @@ -117,7 +117,7 @@ resign_xpaci_unrelated_auth_and_check: xpaci x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x10, x2 ret @@ -139,7 +139,7 @@ resign_xpaci_wrong_pattern_1: xpaci x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -157,7 +157,7 @@ resign_xpaci_wrong_pattern_2: xpaci x0 // x0 instead of x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -174,7 +174,7 @@ resign_xpaci_wrong_pattern_3: xpaci x16 cmp x16, x16 // x16 instead of x0 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -191,7 +191,7 @@ resign_xpaci_wrong_pattern_4: xpaci x16 cmp x0, x0 // x0 instead of x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -208,7 +208,7 @@ resign_xpaci_wrong_pattern_5: mov x16, x16 // replace xpaci with a no-op instruction cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -228,7 +228,7 @@ resign_xpaclri_good: xpaclri cmp x30, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x30, x2 @@ -246,7 +246,7 @@ xpaclri_check_keeps_lr_safe: xpaclri // clobbers LR cmp x30, x16 b.eq 1f - brk 0x1234 // marks LR as trusted and safe-to-dereference + brk 0xc471 // marks LR as trusted and safe-to-dereference 1: ret // not reporting non-protected return .size xpaclri_check_keeps_lr_safe, .-xpaclri_check_keeps_lr_safe @@ -265,7 +265,7 @@ xpaclri_check_requires_safe_lr: xpaclri cmp x30, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: ret .size xpaclri_check_requires_safe_lr, .-xpaclri_check_requires_safe_lr @@ -283,7 +283,7 @@ resign_xpaclri_wrong_reg: xpaclri // ... but xpaclri still operates on x30 cmp x20, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x20, x2 @@ -303,7 +303,7 @@ resign_checked_not_authenticated: xpaci x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -323,7 +323,7 @@ resign_checked_before_authenticated: xpaci x16 cmp x0, x16 b.eq 1f - brk 0x1234 + brk 0xc471 1: autib x0, x1 pacia x0, x2 @@ -339,7 +339,7 @@ resign_high_bits_tbz_good: autib x0, x1 eor x16, x0, x0, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -378,7 +378,7 @@ resign_high_bits_tbz_wrong_bit: autib x0, x1 eor x16, x0, x0, lsl #1 tbz x16, #63, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -393,7 +393,7 @@ resign_high_bits_tbz_wrong_shift_amount: autib x0, x1 eor x16, x0, x0, lsl #2 tbz x16, #62, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -408,7 +408,7 @@ resign_high_bits_tbz_wrong_shift_type: autib x0, x1 eor x16, x0, x0, lsr #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -423,7 +423,7 @@ resign_high_bits_tbz_wrong_pattern_1: autib x0, x1 eor x16, x0, x0, lsl #1 tbz x17, #62, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -438,7 +438,7 @@ resign_high_bits_tbz_wrong_pattern_2: autib x0, x1 eor x16, x10, x0, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -453,7 +453,7 @@ resign_high_bits_tbz_wrong_pattern_3: autib x0, x1 eor x16, x0, x10, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc471 1: pacia x0, x2 ret @@ -648,7 +648,7 @@ many_checked_regs: xpacd x16 // ... cmp x2, x16 // ... b.eq 2f // end of basic block - brk 0x1234 + brk 0xc473 2: pacdza x0 pacdza x1 diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s index c314bc7..f44ba21 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s @@ -79,7 +79,7 @@ good_explicit_check: autia x0, x1 eor x16, x0, x0, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc470 1: ret .size good_explicit_check, .-good_explicit_check @@ -373,7 +373,7 @@ good_explicit_check_multi_bb: 1: eor x16, x0, x0, lsl #1 tbz x16, #62, 2f - brk 0x1234 + brk 0xc470 2: cbz x1, 3f nop @@ -685,8 +685,7 @@ good_address_arith_nocfg: .globl good_explicit_check_unrelated_reg .type good_explicit_check_unrelated_reg,@function good_explicit_check_unrelated_reg: -// CHECK-LABEL: GS-PAUTH: authentication oracle found in function good_explicit_check_unrelated_reg, basic block {{[^,]+}}, at address - // FIXME: The below instruction is not an authentication oracle +// CHECK-NOT: good_explicit_check_unrelated_reg autia x2, x3 // One of possible execution paths after this instruction // ends at BRK below, thus BRK used as a trap instruction // should formally "check everything" not to introduce @@ -694,7 +693,7 @@ good_explicit_check_unrelated_reg: autia x0, x1 eor x16, x0, x0, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc470 1: ldr x4, [x2] // Right before this instruction X2 is checked - this // should be propagated to the basic block ending with diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s index 3a4d383..4d4bb7b 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s @@ -57,7 +57,7 @@ good_sign_auted_checked_brk: autda x0, x2 eor x16, x0, x0, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc472 1: pacda x0, x1 ret @@ -351,7 +351,7 @@ good_sign_auted_checked_brk_multi_bb: 1: eor x16, x0, x0, lsl #1 tbz x16, #62, 2f - brk 0x1234 + brk 0xc472 2: cbz x4, 3f nop @@ -705,7 +705,7 @@ good_resign_with_increment_brk: add x0, x0, #8 eor x16, x0, x0, lsl #1 tbz x16, #62, 1f - brk 0x1234 + brk 0xc472 1: mov x2, x0 pacda x2, x1 diff --git a/bolt/test/binary-analysis/AArch64/trap-instructions.s b/bolt/test/binary-analysis/AArch64/trap-instructions.s new file mode 100644 index 0000000..7810b2d --- /dev/null +++ b/bolt/test/binary-analysis/AArch64/trap-instructions.s @@ -0,0 +1,213 @@ +// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe -Wl,--emit-relocs +// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s + +// Test what instructions can be used to terminate the program abnormally +// on security violation. +// +// All test cases have the same structure: +// +// cbz x0, 1f // [a], ensures [c] is never reported as unreachable +// autia x2, x3 +// cbz x1, 2f // [b] +// [instruction under test] +// 1: +// ret // [c] +// 2: +// ldr x0, [x2] +// ret +// +// This is to handle three possible cases: the instruction under test may be +// considered by BOLT as +// * trapping (and thus no-return): after being authenticated, x2 is ether +// checked by LDR (if [b] is taken) or the program is terminated +// immediately without leaking x2 (if [b] falls through to the trapping +// instruction under test). Nothing is reported. +// * non-trapping, but no-return (such as calling abort()): x2 is leaked if [b] +// falls through. Authentication oracle is reported. +// * non-trapping and falling-through (i.e. a regular instruction): +// x2 is leaked by [c]. Authentication oracle is reported. + + .text + + .globl brk_key_ia + .type brk_key_ia,@function +brk_key_ia: +// CHECK-NOT: brk_key_ia + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0xc470 +1: + ret +2: + ldr x0, [x2] + ret + .size brk_key_ia, .-brk_key_ia + + .globl brk_key_ib + .type brk_key_ib,@function +brk_key_ib: +// CHECK-NOT: brk_key_ib + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0xc471 +1: + ret +2: + ldr x0, [x2] + ret + .size brk_key_ib, .-brk_key_ib + + .globl brk_key_da + .type brk_key_da,@function +brk_key_da: +// CHECK-NOT: brk_key_da + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0xc472 +1: + ret +2: + ldr x0, [x2] + ret + .size brk_key_da, .-brk_key_da + + .globl brk_key_db + .type brk_key_db,@function +brk_key_db: +// CHECK-NOT: brk_key_db + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0xc473 +1: + ret +2: + ldr x0, [x2] + ret + .size brk_key_db, .-brk_key_db + +// The immediate operand of BRK instruction may indicate whether the instruction +// is intended to be a non-recoverable trap: for example, for this code +// +// int test_trap(void) { +// __builtin_trap(); +// return 42; +// } +// int test_debugtrap(void) { +// __builtin_debugtrap(); +// return 42; +// } +// +// Clang produces the following assembly: +// +// test_trap: +// brk #0x1 +// test_debugtrap: +// brk #0xf000 +// mov w0, #42 +// ret +// +// In GCC, __builtin_trap() uses "brk 0x3e8" (i.e. decimal 1000) and +// __builtin_debugtrap() is not supported. +// +// At the time of writing these test cases, any BRK instruction is considered +// no-return by BOLT, thus it ends its basic block and prevents falling through +// to the next BB. +// FIXME: Make BOLT handle __builtin_debugtrap() properly from the CFG point +// of view. + + .globl brk_gcc_builtin_trap + .type brk_gcc_builtin_trap,@function +brk_gcc_builtin_trap: +// CHECK-NOT: brk_gcc_builtin_trap + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0x3e8 // __builtin_trap() +1: + ret +2: + ldr x0, [x2] + ret + .size brk_gcc_builtin_trap, .-brk_gcc_builtin_trap + + .globl brk_clang_builtin_trap + .type brk_clang_builtin_trap,@function +brk_clang_builtin_trap: +// CHECK-NOT: brk_clang_builtin_trap + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0x1 // __builtin_trap() +1: + ret +2: + ldr x0, [x2] + ret + .size brk_clang_builtin_trap, .-brk_clang_builtin_trap + + .globl brk_clang_builtin_debugtrap + .type brk_clang_builtin_debugtrap,@function +brk_clang_builtin_debugtrap: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function brk_clang_builtin_debugtrap, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x2, x3 +// CHECK-NEXT: The 0 instructions that leak the affected registers are: + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0xf000 // __builtin_debugtrap() +1: + ret +2: + ldr x0, [x2] + ret + .size brk_clang_builtin_debugtrap, .-brk_clang_builtin_debugtrap + +// Conservatively assume BRK with an unknown immediate operand as not suitable +// for terminating the program on security violation. + .globl brk_unknown_imm + .type brk_unknown_imm,@function +brk_unknown_imm: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function brk_unknown_imm, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x2, x3 +// CHECK-NEXT: The 0 instructions that leak the affected registers are: + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + brk 0x3572 +1: + ret +2: + ldr x0, [x2] + ret + .size brk_unknown_imm, .-brk_unknown_imm + +// Conservatively assume calling the abort() function may be an unsafe way to +// terminate the program, as there is some amount of instructions that would +// be executed when the program state is already tampered with. + .globl call_abort_fn + .type call_abort_fn,@function +call_abort_fn: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function call_abort_fn, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x2, x3 +// CHECK-NEXT: The 0 instructions that leak the affected registers are: + cbz x0, 1f + autia x2, x3 + cbz x1, 2f + b abort // a no-return tail call to abort() +1: + ret +2: + ldr x0, [x2] + ret + .size call_abort_fn, .-call_abort_fn + + .globl main + .type main,@function +main: + mov x0, 0 + ret + .size main, .-main diff --git a/bolt/test/dump-dot-func.test b/bolt/test/dump-dot-func.test new file mode 100644 index 0000000..f05bfc1 --- /dev/null +++ b/bolt/test/dump-dot-func.test @@ -0,0 +1,52 @@ +# Test the --dump-dot-func option with multiple functions +# (includes tests for both mangled/unmangled names) + +RUN: %clangxx %p/Inputs/multi-func.cpp -o %t.exe -Wl,-q + +# Test 1: --dump-dot-func with specific function name (mangled) +RUN: llvm-bolt %t.exe -o %t.bolt1 --dump-dot-func=_Z3addii -v=1 2>&1 | FileCheck %s --check-prefix=ADD + +# Test 2: --dump-dot-func with regex pattern (main.*) +RUN: llvm-bolt %t.exe -o %t.bolt2 --dump-dot-func="main.*" -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-REGEX + +# Test 3: --dump-dot-func with multiple specific functions (mangled names) +RUN: llvm-bolt %t.exe -o %t.bolt3 --dump-dot-func=_Z3addii,_Z8multiplyii -v=1 2>&1 | FileCheck %s --check-prefix=MULTI + +# Test 4: No option specified should create no dot files +RUN: llvm-bolt %t.exe -o %t.bolt4 2>&1 | FileCheck %s --check-prefix=NONE + +# Test 5: --dump-dot-func with non-existent function +RUN: llvm-bolt %t.exe -o %t.bolt5 --dump-dot-func=nonexistent -v=1 2>&1 | FileCheck %s --check-prefix=NONEXISTENT + +# Test 6: Backward compatibility - --dump-dot-all should still work +RUN: llvm-bolt %t.exe -o %t.bolt6 --dump-dot-all -v=1 2>&1 | FileCheck %s --check-prefix=ALL + +# Test 7: Test with unmangled function name (main function) +RUN: llvm-bolt %t.exe -o %t.bolt7 --dump-dot-func=main -v=1 2>&1 | FileCheck %s --check-prefix=MAIN-UNMANGLED + +# Check that specific functions are dumped +ADD: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot +ADD-NOT: BOLT-INFO: dumping CFG to main-00_build-cfg.dot +ADD-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot +ADD-NOT: BOLT-INFO: dumping CFG to _Z11main_helperv-00_build-cfg.dot + +MAIN-REGEX-DAG: BOLT-INFO: dumping CFG to main-00_build-cfg.dot +MAIN-REGEX-NOT: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot +MAIN-REGEX-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot + +MULTI-DAG: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot +MULTI-DAG: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot +MULTI-NOT: BOLT-INFO: dumping CFG to main-00_build-cfg.dot +MULTI-NOT: BOLT-INFO: dumping CFG to _Z11main_helperv-00_build-cfg.dot + +# Should be no dumping messages when no option is specified +NONE-NOT: BOLT-INFO: dumping CFG + +# Should be no dumping messages for non-existent function +NONEXISTENT-NOT: BOLT-INFO: dumping CFG + +ALL: BOLT-INFO: dumping CFG to main-00_build-cfg.dot + +MAIN-UNMANGLED: BOLT-INFO: dumping CFG to main-00_build-cfg.dot +MAIN-UNMANGLED-NOT: BOLT-INFO: dumping CFG to _Z3addii-00_build-cfg.dot +MAIN-UNMANGLED-NOT: BOLT-INFO: dumping CFG to _Z8multiplyii-00_build-cfg.dot
\ No newline at end of file diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py index 898dce8..42aed64 100755 --- a/bolt/test/link_fdata.py +++ b/bolt/test/link_fdata.py @@ -9,6 +9,7 @@ respective anchor symbols, and prints the resulting file to stdout. import argparse import os +import platform import shutil import subprocess import sys @@ -19,7 +20,11 @@ parser.add_argument("input") parser.add_argument("objfile", help="Object file to extract symbol values from") parser.add_argument("output") parser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix") -parser.add_argument("--nmtool", default="nm", help="Path to nm tool") +parser.add_argument( + "--nmtool", + default="llvm-nm" if platform.system() == "Windows" else "nm", + help="Path to nm tool", +) parser.add_argument("--no-lbr", action="store_true") parser.add_argument("--no-redefine", action="store_true") @@ -27,7 +32,7 @@ args = parser.parse_args() # Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated # profile data -prefix_pat = re.compile(f"^# {args.prefix}: (.*)") +prefix_pat = re.compile(f"^(#|//) {args.prefix}: (.*)") # FDATA records: # <is symbol?> <closest elf symbol or DSO name> <relative FROM address> @@ -56,7 +61,7 @@ with open(args.input, "r") as f: prefix_match = prefix_pat.match(line) if not prefix_match: continue - profile_line = prefix_match.group(1) + profile_line = prefix_match.group(2) fdata_match = fdata_pat.match(profile_line) preagg_match = preagg_pat.match(profile_line) nolbr_match = nolbr_pat.match(profile_line) @@ -86,7 +91,10 @@ with open(args.input, "r") as f: exit("ERROR: unexpected input:\n%s" % line) # Read nm output: <symbol value> <symbol type> <symbol name> -is_llvm_nm = os.path.basename(os.path.realpath(shutil.which(args.nmtool))) == "llvm-nm" +# Ignore .exe on Windows host. +is_llvm_nm = os.path.basename(os.path.realpath(shutil.which(args.nmtool))).startswith( + "llvm-nm" +) nm_output = subprocess.run( [ args.nmtool, diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index 0d05229..bef570b 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -18,11 +18,22 @@ from lit.llvm.subst import FindTool # name: The name of this test suite. config.name = "BOLT" +# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites. +# See https://github.com/llvm/llvm-project/issues/106636 for more details. +# +# We prefer the lit internal shell which provides a better user experience on failures +# and is faster unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0 +# env var. +use_lit_shell = True +lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL") +if lit_shell_env: + use_lit_shell = lit.util.pythonize_bool(lit_shell_env) + # testFormat: The test format to use to interpret tests. # # For now we require '&&' between commands, until they get globally killed and # the test runner updated. -config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) +config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell) # suffixes: A list of file extensions to treat as test files. config.suffixes = [ diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test index 434d4d2..08b3413 100644 --- a/bolt/test/perf2bolt/perf_test.test +++ b/bolt/test/perf2bolt/perf_test.test @@ -2,7 +2,7 @@ REQUIRES: system-linux, perf -RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t +RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -pie -Wl,--script=%S/Inputs/perf_test.lds -o %t RUN: perf record -Fmax -e cycles:u -o %t2 -- %t RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id --show-density \ RUN: --heatmap %t.hm 2>&1 | FileCheck %s diff --git a/bolt/test/permission.test b/bolt/test/permission.test index f495e87..ecb51fc 100644 --- a/bolt/test/permission.test +++ b/bolt/test/permission.test @@ -1,13 +1,28 @@ # Ensure that the permissions of the optimized binary file comply with the # system's umask. -# This test performs a logical AND operation on the results of the `stat -c %a -# %t.bolt` and `umask` commands (both results are displayed in octal), and -# checks whether the result is equal to 0. -REQUIRES: shell, system-linux +# This test uses umask, which is Linux specific. +REQUIRES: system-linux -RUN: %clang %cflags %p/Inputs/hello.c -o %t -Wl,-q -RUN: llvm-bolt %t -o %t.bolt -RUN: echo $(( 8#$(stat -c %a %t.bolt) & 8#$(umask) )) | FileCheck %s +# RUN: rm -f %t +# RUN: touch %t +# RUN: chmod 0755 %t +# RUN: ls -l %t | cut -f 1 -d ' ' > %t.0755 +# RUN: chmod 0600 %t +# RUN: ls -l %t | cut -f 1 -d ' ' > %t.0600 +# RUN: chmod 0655 %t +# RUN: ls -l %t | cut -f 1 -d ' ' > %t.0655 -CHECK: 0 +RUN: %clang %cflags %p/Inputs/hello.c -o %t.exe -Wl,-q + +RUN: umask 0022 +RUN: llvm-bolt %t.exe -o %t1 +RUN: ls -l %t1 | cut -f 1 -d ' ' | cmp - %t.0755 + +RUN: umask 0177 +RUN: llvm-bolt %t.exe -o %t2 +RUN: ls -l %t2 | cut -f 1 -d ' ' | cmp - %t.0600 + +RUN: umask 0122 +RUN: llvm-bolt %t.exe -o %t3 +RUN: ls -l %t3 | cut -f 1 -d ' ' | cmp - %t.0655 diff --git a/bolt/test/runtime/X86/tail-duplication-constant-prop.s b/bolt/test/runtime/X86/tail-duplication-constant-prop.s index 863c6ff..c28c2f4 100644 --- a/bolt/test/runtime/X86/tail-duplication-constant-prop.s +++ b/bolt/test/runtime/X86/tail-duplication-constant-prop.s @@ -8,8 +8,8 @@ # RUN: --print-finalized \ # RUN: --tail-duplication=moderate --tail-duplication-minimum-offset=1 \ # RUN: --tail-duplication-const-copy-propagation=1 -o %t.out | FileCheck %s -# RUN: %t.exe; echo $? -# RUN: %t.out; echo $? +# RUN: not %t.exe +# RUN: not %t.out # FDATA: 1 main 14 1 main #.BB2# 0 10 # FDATA: 1 main 16 1 main #.BB2# 0 20 diff --git a/bolt/test/runtime/copy_file.py b/bolt/test/runtime/copy_file.py new file mode 100644 index 0000000..14db9d0 --- /dev/null +++ b/bolt/test/runtime/copy_file.py @@ -0,0 +1,15 @@ +import sys +import shutil + +with open(sys.argv[1] + ".output") as log_file: + lines = log_file.readlines() + for line in lines: + if line.startswith(sys.argv[2]): + pid = line.split(" ")[1].strip() + shutil.copy( + sys.argv[1] + "." + pid + ".fdata", + sys.argv[1] + "." + sys.argv[3] + ".fdata", + ) + sys.exit(0) + +sys.exit(1) diff --git a/bolt/test/runtime/instrumentation-indirect-2.c b/bolt/test/runtime/instrumentation-indirect-2.c index 7d19db1..4883d9b 100644 --- a/bolt/test/runtime/instrumentation-indirect-2.c +++ b/bolt/test/runtime/instrumentation-indirect-2.c @@ -50,7 +50,7 @@ int main() { return 0; } /* -REQUIRES: system-linux,shell,fuser +REQUIRES: system-linux,fuser RUN: %clang %cflags %s -o %t.exe -Wl,-q -pie -fpie @@ -61,10 +61,14 @@ RUN: --instrumentation-wait-forks # Instrumented program needs to finish returning zero # Both output and profile must contain all 16 functions -RUN: %t.instrumented_conservative > %t.output -# Wait for profile and output to be fully written -RUN: bash %S/wait_file.sh %t.output -RUN: bash %S/wait_file.sh %t.fdata +# We need to use bash to invoke this as otherwise we hang inside a +# popen.communicate call in lit's internal shell. Eventually we should not +# need this. +# TODO(boomanaiden154): Remove once +# https://github.com/llvm/llvm-project/issues/156484 is fixed. +RUN: bash -c "%t.instrumented_conservative; wait" > %t.output +# We can just read because we ensure the profile will be fully written by +# calling wait inside the bash invocation. RUN: cat %t.output | FileCheck %s --check-prefix=CHECK-OUTPUT RUN: cat %t.fdata | FileCheck %s --check-prefix=CHECK-COMMON-PROF @@ -112,14 +116,8 @@ RUN: bash %S/wait_file.sh %t.output # Make sure all functions were called RUN: cat %t.output | FileCheck %s --check-prefix=CHECK-OUTPUT -RUN: child_pid=$(cat %t.output | grep funcA | awk '{print $2;}') -RUN: par_pid=$(cat %t.output | grep funcB | awk '{print $2;}') - -RUN: bash %S/wait_file.sh %t.$child_pid.fdata -RUN: bash %S/wait_file.sh %t.$par_pid.fdata - -RUN: mv %t.$child_pid.fdata %t.child.fdata -RUN: mv %t.$par_pid.fdata %t.parent.fdata +RUN: %python %S/copy_file.py %t funcA child +RUN: %python %S/copy_file.py %t funcB parent # Instrumented binary must produce two profiles with only local calls # recorded. Functions called only in child should not appear in parent's diff --git a/bolt/test/timers.c b/bolt/test/timers.c index a34958a..59bd0d5 100644 --- a/bolt/test/timers.c +++ b/bolt/test/timers.c @@ -1,22 +1,21 @@ -/* This test checks timers for metadata manager phases. -# RUN: %clang %cflags %s -o %t.exe -# RUN: link_fdata %s %t.exe %t.fdata -# RUN: llvm-bolt %t.exe -o %t.null --data %t.fdata -w %t.yaml --time-rewrite \ -# RUN: 2>&1 | FileCheck %s -# RUN: link_fdata %s %t.exe %t.preagg PREAGG -# RUN: perf2bolt %t.exe -o %t.null -p %t.preagg --pa --time-rewrite \ -# RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-P2B +// This test checks timers for metadata manager phases. +// RUN: %clang %cflags %s -o %t.exe +// RUN: link_fdata %s %t.exe %t.fdata +// RUN: llvm-bolt %t.exe -o %t.null --data %t.fdata -w %t.yaml --time-rewrite \ +// RUN: 2>&1 | FileCheck %s +// RUN: link_fdata %s %t.exe %t.preagg PREAGG +// RUN: perf2bolt %t.exe -o %t.null -p %t.preagg --pa --time-rewrite \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-P2B -# CHECK-DAG: update metadata post-emit -# CHECK-DAG: process section metadata -# CHECK-DAG: process metadata pre-CFG -# CHECK-DAG: process metadata post-CFG -# CHECK-DAG: finalize metadata pre-emit +// CHECK-DAG: update metadata post-emit +// CHECK-DAG: process section metadata +// CHECK-DAG: process metadata pre-CFG +// CHECK-DAG: process metadata post-CFG +// CHECK-DAG: finalize metadata pre-emit -# CHECK-P2B-DAG: process section metadata -# CHECK-P2B-DAG: process metadata pre-CFG +// CHECK-P2B-DAG: process section metadata +// CHECK-P2B-DAG: process metadata pre-CFG -# FDATA: 0 [unknown] 0 1 main 0 1 0 -# PREAGG: B X:0 #main# 1 0 -*/ +// FDATA: 0 [unknown] 0 1 main 0 1 0 +// PREAGG: B X:0 #main# 1 0 int main() { return 0; } |