diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r-- | llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll | 38 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/call-graph-section-assembly.ll | 43 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/call-graph-section-tailcall.ll | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/call-graph-section.ll | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll | 43 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr162812.ll | 98 |
6 files changed, 214 insertions, 34 deletions
diff --git a/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll new file mode 100644 index 0000000..2aea9c1 --- /dev/null +++ b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll @@ -0,0 +1,38 @@ +;; Test if a potential indirect call target function which has internal linkage and +;; address taken has its type ID emitted to callgraph section. +;; This test also makes sure that callback functions which meet the above constraint +;; are handled correctly. + +; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -o - < %s | FileCheck %s + +declare !type !0 void @_Z6doWorkPFviE(ptr) + +define i32 @_Z4testv() !type !1 { +entry: + call void @_Z6doWorkPFviE(ptr nonnull @_ZL10myCallbacki) + ret i32 0 +} + +; CHECK: _ZL10myCallbacki: +; CHECK-NEXT: [[LABEL_FUNC:\.Lfunc_begin[0-9]+]]: +define internal void @_ZL10myCallbacki(i32 %value) !type !2 { +entry: + %sink = alloca i32, align 4 + store volatile i32 %value, ptr %sink, align 4 + %i1 = load volatile i32, ptr %sink, align 4 + ret void +} + +!0 = !{i64 0, !"_ZTSFvPFviEE.generalized"} +!1 = !{i64 0, !"_ZTSFivE.generalized"} +!2 = !{i64 0, !"_ZTSFviE.generalized"} + +; CHECK: .section .callgraph,"o",@progbits,.text +;; Version +; CHECK-NEXT: .byte 0 +;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0. +; CHECK-NEXT: .byte 1 +;; Function Entry PC +; CHECK-NEXT: .quad [[LABEL_FUNC]] +;; Function type ID +; CHECK-NEXT: .quad -5212364466660467813 diff --git a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll index f0dbc31..1aabf66 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll @@ -15,16 +15,13 @@ declare !type !2 ptr @direct_baz(ptr) define ptr @ball() { entry: call void @direct_foo() - %fp_foo_val = load ptr, ptr null, align 8 - ; CHECK: [[LABEL_TMP0:\.L.*]]: + %fp_foo_val = load ptr, ptr null, align 8 call void (...) %fp_foo_val(), !callee_type !0 call void @direct_foo() - %fp_bar_val = load ptr, ptr null, align 8 - ; CHECK: [[LABEL_TMP1:\.L.*]]: + %fp_bar_val = load ptr, ptr null, align 8 %call_fp_bar = call i32 %fp_bar_val(i8 0), !callee_type !2 %call_fp_bar_direct = call i32 @direct_bar(i8 1) %fp_baz_val = load ptr, ptr null, align 8 - ; CHECK: [[LABEL_TMP2:\.L.*]]: %call_fp_baz = call ptr %fp_baz_val(ptr null), !callee_type !4 call void @direct_foo() %call_fp_baz_direct = call ptr @direct_baz(ptr null) @@ -32,29 +29,31 @@ entry: ret ptr %call_fp_baz } -; CHECK: .section .callgraph,"o",@progbits,.text - -; CHECK-NEXT: .quad 0 -; CHECK-NEXT: .quad [[LABEL_FUNC]] -; CHECK-NEXT: .quad 1 -; CHECK-NEXT: .quad 3 !0 = !{!1} !1 = !{i64 0, !"_ZTSFvE.generalized"} -;; Test for MD5 hash of _ZTSFvE.generalized and the generated temporary callsite label. -; CHECK-NEXT: .quad 4524972987496481828 -; CHECK-NEXT: .quad [[LABEL_TMP0]] !2 = !{!3} !3 = !{i64 0, !"_ZTSFicE.generalized"} -;; Test for MD5 hash of _ZTSFicE.generalized and the generated temporary callsite label. -; CHECK-NEXT: .quad 3498816979441845844 -; CHECK-NEXT: .quad [[LABEL_TMP1]] !4 = !{!5} !5 = !{i64 0, !"_ZTSFPvS_E.generalized"} -;; Test for MD5 hash of _ZTSFPvS_E.generalized and the generated temporary callsite label. -; CHECK-NEXT: .quad 8646233951371320954 -; CHECK-NEXT: .quad [[LABEL_TMP2]] -;; Test for number of direct calls and {callsite_label, callee} pairs. -; CHECK-NEXT: .quad 3 + +; CHECK: .section .callgraph,"o",@progbits,.text +;; Version +; CHECK-NEXT: .byte 0 +;; Flags +; CHECK-NEXT: .byte 7 +;; Function Entry PC +; CHECK-NEXT: .quad [[LABEL_FUNC]] +;; Function type ID -- set to 0 as no type metadata attached to function. +; CHECK-NEXT: .quad 0 +;; Number of unique direct callees. +; CHECK-NEXT: .byte 3 +;; Direct callees. ; CHECK-NEXT: .quad direct_foo ; CHECK-NEXT: .quad direct_bar ; CHECK-NEXT: .quad direct_baz +;; Number of unique indirect target type IDs. +; CHECK-NEXT: .byte 3 +;; Indirect type IDs. +; CHECK-NEXT: .quad 4524972987496481828 +; CHECK-NEXT: .quad 3498816979441845844 +; CHECK-NEXT: .quad 8646233951371320954 diff --git a/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll b/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll index fa14a98..34dc5b8 100644 --- a/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll +++ b/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll @@ -22,13 +22,14 @@ declare !type !2 i32 @foo(i8 signext) declare !type !2 i32 @bar(i8 signext) -;; Check that the numeric type id (md5 hash) for the below type ids are emitted -;; to the callgraph section. - -; CHECK: Hex dump of section '.callgraph': - !0 = !{i64 0, !"_ZTSFiPvcE.generalized"} !1 = !{!2} -; CHECK-DAG: 5486bc59 814b8e30 !2 = !{i64 0, !"_ZTSFicE.generalized"} !3 = !{i64 0, !"_ZTSFiiE.generalized"} + +; CHECK: Hex dump of section '.callgraph': +; CHECK-NEXT: 0x00000000 00050000 00000000 00008e19 0b7f3326 +; CHECK-NEXT: 0x00000010 e3000154 86bc5981 4b8e3000 05000000 +;; Verify that the type id 0x308e4b8159bc8654 is in section. +; CHECK-NEXT: 0x00000020 00000000 00a150b8 3e0cfe3c b2015486 +; CHECK-NEXT: 0x00000030 bc59814b 8e30 diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll b/llvm/test/CodeGen/X86/call-graph-section.ll index 66d009c..c144a24 100644 --- a/llvm/test/CodeGen/X86/call-graph-section.ll +++ b/llvm/test/CodeGen/X86/call-graph-section.ll @@ -22,15 +22,16 @@ entry: ;; Check that the numeric type id (md5 hash) for the below type ids are emitted ;; to the callgraph section. - -; CHECK: Hex dump of section '.callgraph': - -; CHECK-DAG: 2444f731 f5eecb3e !0 = !{i64 0, !"_ZTSFvE.generalized"} !1 = !{!0} -; CHECK-DAG: 5486bc59 814b8e30 !2 = !{i64 0, !"_ZTSFicE.generalized"} !3 = !{!2} -; CHECK-DAG: 7ade6814 f897fd77 !4 = !{!5} !5 = !{i64 0, !"_ZTSFPvS_E.generalized"} + +;; Make sure following type IDs are in call graph section +;; 0x5eecb3e2444f731f, 0x814b8e305486bc59, 0xf897fd777ade6814 +; CHECK: Hex dump of section '.callgraph': +; CHECK-NEXT: 0x00000000 00050000 00000000 00000000 00000000 +; CHECK-NEXT: 0x00000010 00000324 44f731f5 eecb3e54 86bc5981 +; CHECK-NEXT: 0x00000020 4b8e307a de6814f8 97fd77 diff --git a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll new file mode 100644 index 0000000..a0c243b --- /dev/null +++ b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll @@ -0,0 +1,43 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;; A minimal test case. llc will crash if global variables already has a section +;; prefix. Subsequent PRs will expand on this test case to test the hotness +;; reconciliation implementation. + +; RUN: not llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ +; RUN: -partition-static-data-sections=true \ +; RUN: -data-sections=true -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=ERR + +; ERR: Global variable hot_bss already has a section prefix hot + +@hot_bss = internal global i32 0, !section_prefix !17 + +define void @hot_func() !prof !14 { + %9 = load i32, ptr @hot_bss + %11 = call i32 (...) @func_taking_arbitrary_param(i32 %9) + ret void +} + +declare i32 @func_taking_arbitrary_param(...) + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1460183} +!5 = !{!"MaxCount", i64 849024} +!6 = !{!"MaxInternalCount", i64 32769} +!7 = !{!"MaxFunctionCount", i64 849024} +!8 = !{!"NumCounts", i64 23627} +!9 = !{!"NumFunctions", i64 3271} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13} +!12 = !{i32 990000, i64 166, i32 73} +!13 = !{i32 999999, i64 3, i32 1443} +!14 = !{!"function_entry_count", i64 100000} +!15 = !{!"function_entry_count", i64 1} +!16 = !{!"branch_weights", i32 1, i32 99999} +!17 = !{!"section_prefix", !"hot"} diff --git a/llvm/test/CodeGen/X86/pr162812.ll b/llvm/test/CodeGen/X86/pr162812.ll new file mode 100644 index 0000000..4ea3101 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr162812.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE2 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE42 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512 + +define <32 x i8> @PR162812(<32 x i8> %a, <32 x i8> %mask) { +; SSE2-LABEL: PR162812: +; SSE2: # %bb.0: +; SSE2-NEXT: psrlw $2, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [8224,8224,8224,8224,8224,8224,8224,8224] +; SSE2-NEXT: pand %xmm4, %xmm2 +; SSE2-NEXT: paddb %xmm2, %xmm2 +; SSE2-NEXT: paddb %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm5, %xmm5 +; SSE2-NEXT: pxor %xmm6, %xmm6 +; SSE2-NEXT: pcmpgtb %xmm2, %xmm6 +; SSE2-NEXT: movdqa %xmm6, %xmm2 +; SSE2-NEXT: pandn %xmm0, %xmm2 +; SSE2-NEXT: paddb %xmm0, %xmm0 +; SSE2-NEXT: pand %xmm6, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: psrlw $2, %xmm3 +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: paddb %xmm3, %xmm3 +; SSE2-NEXT: paddb %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtb %xmm3, %xmm5 +; SSE2-NEXT: movdqa %xmm5, %xmm2 +; SSE2-NEXT: pandn %xmm1, %xmm2 +; SSE2-NEXT: paddb %xmm1, %xmm1 +; SSE2-NEXT: pand %xmm5, %xmm1 +; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: retq +; +; SSE42-LABEL: PR162812: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa %xmm2, %xmm5 +; SSE42-NEXT: movdqa %xmm0, %xmm2 +; SSE42-NEXT: movdqa %xmm0, %xmm6 +; SSE42-NEXT: psllw $2, %xmm6 +; SSE42-NEXT: movdqa {{.*#+}} xmm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] +; SSE42-NEXT: pand %xmm7, %xmm6 +; SSE42-NEXT: psrlw $2, %xmm5 +; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [8224,8224,8224,8224,8224,8224,8224,8224] +; SSE42-NEXT: pand %xmm4, %xmm5 +; SSE42-NEXT: paddb %xmm5, %xmm5 +; SSE42-NEXT: movdqa %xmm5, %xmm0 +; SSE42-NEXT: pblendvb %xmm0, %xmm6, %xmm2 +; SSE42-NEXT: movdqa %xmm2, %xmm6 +; SSE42-NEXT: paddb %xmm2, %xmm6 +; SSE42-NEXT: paddb %xmm5, %xmm5 +; SSE42-NEXT: movdqa %xmm5, %xmm0 +; SSE42-NEXT: pblendvb %xmm0, %xmm6, %xmm2 +; SSE42-NEXT: movdqa %xmm1, %xmm5 +; SSE42-NEXT: psllw $2, %xmm5 +; SSE42-NEXT: pand %xmm7, %xmm5 +; SSE42-NEXT: psrlw $2, %xmm3 +; SSE42-NEXT: pand %xmm3, %xmm4 +; SSE42-NEXT: paddb %xmm4, %xmm4 +; SSE42-NEXT: movdqa %xmm4, %xmm0 +; SSE42-NEXT: pblendvb %xmm0, %xmm5, %xmm1 +; SSE42-NEXT: movdqa %xmm1, %xmm3 +; SSE42-NEXT: paddb %xmm1, %xmm3 +; SSE42-NEXT: paddb %xmm4, %xmm4 +; SSE42-NEXT: movdqa %xmm4, %xmm0 +; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm1 +; SSE42-NEXT: movdqa %xmm2, %xmm0 +; SSE42-NEXT: retq +; +; AVX2-LABEL: PR162812: +; AVX2: # %bb.0: +; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpsrlw $2, %ymm1, %ymm1 +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 +; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: PR162812: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2 +; AVX512-NEXT: vpsrlw $2, %ymm1, %ymm1 +; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1 +; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512-NEXT: vpaddb %ymm0, %ymm0, %ymm2 +; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512-NEXT: retq + %1 = lshr <32 x i8> %mask, splat (i8 7) + %ret = shl <32 x i8> %a, %1 + ret <32 x i8> %ret +} |