diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r-- | llvm/test/CodeGen/X86/avg.ll | 74 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/global-variable-partition.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/relptr-rodata.ll | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/setcc-wide-types.ll | 155 |
5 files changed, 239 insertions, 36 deletions
diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll index 0de308a..5152c005 100644 --- a/llvm/test/CodeGen/X86/avg.ll +++ b/llvm/test/CodeGen/X86/avg.ll @@ -728,45 +728,70 @@ define void @avg_v32i8_2(ptr %a, ptr %b) nounwind { define void @avg_v64i8_2(ptr %a, ptr %b) nounwind { ; SSE2-LABEL: avg_v64i8_2: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps (%rsi), %xmm0 -; SSE2-NEXT: movaps 16(%rsi), %xmm1 -; SSE2-NEXT: movaps 32(%rsi), %xmm2 -; SSE2-NEXT: movaps 48(%rsi), %xmm3 -; SSE2-NEXT: movups %xmm3, (%rax) -; SSE2-NEXT: movups %xmm2, (%rax) -; SSE2-NEXT: movups %xmm1, (%rax) -; SSE2-NEXT: movups %xmm0, (%rax) +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: movdqa 16(%rdi), %xmm1 +; SSE2-NEXT: movdqa 32(%rdi), %xmm2 +; SSE2-NEXT: movdqa 48(%rdi), %xmm3 +; SSE2-NEXT: pavgb (%rsi), %xmm0 +; SSE2-NEXT: pavgb 16(%rsi), %xmm1 +; SSE2-NEXT: pavgb 32(%rsi), %xmm2 +; SSE2-NEXT: pavgb 48(%rsi), %xmm3 +; SSE2-NEXT: movdqu %xmm3, (%rax) +; SSE2-NEXT: movdqu %xmm2, (%rax) +; SSE2-NEXT: movdqu %xmm1, (%rax) +; SSE2-NEXT: movdqu %xmm0, (%rax) ; SSE2-NEXT: retq ; ; AVX1-LABEL: avg_v64i8_2: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovaps (%rsi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rsi), %ymm1 -; AVX1-NEXT: vmovups %ymm1, (%rax) -; AVX1-NEXT: vmovups %ymm0, (%rax) -; AVX1-NEXT: vzeroupper +; AVX1-NEXT: vmovdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 +; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 +; AVX1-NEXT: vpavgb (%rsi), %xmm0, %xmm0 +; AVX1-NEXT: vpavgb 16(%rsi), %xmm1, %xmm1 +; AVX1-NEXT: vpavgb 32(%rsi), %xmm2, %xmm2 +; AVX1-NEXT: vpavgb 48(%rsi), %xmm3, %xmm3 +; AVX1-NEXT: vmovdqu %xmm3, (%rax) +; AVX1-NEXT: vmovdqu %xmm2, (%rax) +; AVX1-NEXT: vmovdqu %xmm1, (%rax) +; AVX1-NEXT: vmovdqu %xmm0, (%rax) ; AVX1-NEXT: retq ; ; AVX2-LABEL: avg_v64i8_2: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovaps (%rsi), %ymm0 -; AVX2-NEXT: vmovaps 32(%rsi), %ymm1 -; AVX2-NEXT: vmovups %ymm1, (%rax) -; AVX2-NEXT: vmovups %ymm0, (%rax) +; AVX2-NEXT: vmovdqa (%rdi), %ymm0 +; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 +; AVX2-NEXT: vpavgb (%rsi), %ymm0, %ymm0 +; AVX2-NEXT: vpavgb 32(%rsi), %ymm1, %ymm1 +; AVX2-NEXT: vmovdqu %ymm1, (%rax) +; AVX2-NEXT: vmovdqu %ymm0, (%rax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: avg_v64i8_2: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovaps (%rsi), %zmm0 -; AVX512-NEXT: vmovups %zmm0, (%rax) -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: avg_v64i8_2: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1 +; AVX512F-NEXT: vpavgb (%rsi), %ymm0, %ymm0 +; AVX512F-NEXT: vpavgb 32(%rsi), %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqu %ymm1, (%rax) +; AVX512F-NEXT: vmovdqu %ymm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: avg_v64i8_2: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpavgb (%rsi), %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq %1 = load <64 x i8>, ptr %a %2 = load <64 x i8>, ptr %b %3 = zext <64 x i8> %1 to <64 x i32> %4 = zext <64 x i8> %2 to <64 x i32> - %5 = add nuw nsw <64 x i32> %4, %4 + %5 = add nuw nsw <64 x i32> %3, %4 %6 = add nuw nsw <64 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> %7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> %8 = trunc <64 x i32> %7 to <64 x i8> @@ -774,7 +799,6 @@ define void @avg_v64i8_2(ptr %a, ptr %b) nounwind { ret void } - define void @avg_v4i16_2(ptr %a, ptr %b) nounwind { ; SSE2-LABEL: avg_v4i16_2: ; SSE2: # %bb.0: diff --git a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll index a0c243b..f3950b7 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll @@ -1,16 +1,15 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -;; A minimal test case. llc will crash if global variables already has a section -;; prefix. Subsequent PRs will expand on this test case to test the hotness -;; reconciliation implementation. - -; RUN: not llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ +;; A minimal test case. Subsequent PRs will expand on this test case +;; (e.g., with more functions, variables and profiles) and test the hotness +;; reconcillation implementation. +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ ; RUN: -partition-static-data-sections=true \ ; RUN: -data-sections=true -unique-section-names=false \ -; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=ERR +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=IR -; ERR: Global variable hot_bss already has a section prefix hot +; IR: .section .bss.hot.,"aw" @hot_bss = internal global i32 0, !section_prefix !17 diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll index ce06d17..604b4fd 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition.ll @@ -106,23 +106,31 @@ target triple = "x86_64-unknown-linux-gnu" ; UNIQ-NEXT: .section .data.unlikely.,"aw",@progbits,unique,8 ; AGG-NEXT: .section .data.unlikely.,"aw",@progbits +;; The `.section` directive is omitted for .data with -unique-section-names=false. +; See MCSectionELF::shouldOmitSectionDirective for the implementation details. + ; For @data_with_unknown_hotness ; SYM: .type .Ldata_with_unknown_hotness,@object # @data_with_unknown_hotness ; SYM: .section .data..Ldata_with_unknown_hotness,"aw",@progbits ; UNIQ: .section .data,"aw",@progbits,unique,9 -; The `.section` directive is omitted for .data with -unique-section-names=false. -; See MCSectionELF::shouldOmitSectionDirective for the implementation details. + ; AGG: .data ; COMMON: .Ldata_with_unknown_hotness: -; For @hot_data_custom_bar_section -; It has an explicit section attribute 'var' and shouldn't have hot or unlikely suffix. +; For variables that are not eligible for section prefix annotation ; COMMON: .type hot_data_custom_bar_section,@object ; SYM-NEXT: .section bar,"aw",@progbits ; SYM: hot_data_custom_bar_section ; UNIQ: .section bar,"aw",@progbits ; AGG: .section bar,"aw",@progbits +; SYM: .section .data.llvm.fake_var,"aw" +; UNIQ: .section .data,"aw" +; AGG: .data + +;; No section for linker declaration +; COMMON-NOT: qux + @.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1 @.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1 @hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3] @@ -137,6 +145,8 @@ target triple = "x86_64-unknown-linux-gnu" @data3 = internal global i32 3 @data_with_unknown_hotness = private global i32 5 @hot_data_custom_bar_section = internal global i32 101 #0 +@llvm.fake_var = internal global i32 123 +@qux = external global i64 define void @cold_func(i32 %0) !prof !15 { %2 = load i32, ptr @cold_bss diff --git a/llvm/test/CodeGen/X86/relptr-rodata.ll b/llvm/test/CodeGen/X86/relptr-rodata.ll index ea22b08..954ea8f 100644 --- a/llvm/test/CodeGen/X86/relptr-rodata.ll +++ b/llvm/test/CodeGen/X86/relptr-rodata.ll @@ -10,16 +10,31 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK: .long hidden-rodata @rodata = hidden constant i32 trunc (i64 sub (i64 ptrtoint (ptr @hidden to i64), i64 ptrtoint (ptr @rodata to i64)) to i32) +; CHECK: .section .rodata.rodata_ptrtoaddr +; CHECK: rodata_ptrtoaddr: +; CHECK: .long hidden-rodata_ptrtoaddr +@rodata_ptrtoaddr = hidden constant i32 trunc (i64 sub (i64 ptrtoaddr (ptr @hidden to i64), i64 ptrtoaddr (ptr @rodata_ptrtoaddr to i64)) to i32) + ; CHECK: .section .data.rel.ro.relro1 ; CHECK: relro1: ; CHECK: .long default-relro1 @relro1 = hidden constant i32 trunc (i64 sub (i64 ptrtoint (ptr @default to i64), i64 ptrtoint (ptr @relro1 to i64)) to i32) +; CHECK: .section .data.rel.ro.relro1_ptrtoaddr +; CHECK: relro1_ptrtoaddr: +; CHECK: .long default-relro1_ptrtoaddr +@relro1_ptrtoaddr = hidden constant i32 trunc (i64 sub (i64 ptrtoaddr (ptr @default to i64), i64 ptrtoaddr (ptr @relro1_ptrtoaddr to i64)) to i32) + ; CHECK: .section .data.rel.ro.relro2 ; CHECK: relro2: ; CHECK: .long hidden-relro2 @relro2 = constant i32 trunc (i64 sub (i64 ptrtoint (ptr @hidden to i64), i64 ptrtoint (ptr @relro2 to i64)) to i32) +; CHECK: .section .data.rel.ro.relro2_ptrtoaddr +; CHECK: relro2_ptrtoaddr: +; CHECK: .long hidden-relro2_ptrtoaddr +@relro2_ptrtoaddr = constant i32 trunc (i64 sub (i64 ptrtoaddr (ptr @hidden to i64), i64 ptrtoaddr (ptr @relro2_ptrtoaddr to i64)) to i32) + ; CHECK: .section .rodata.obj ; CHECK-NEXT: .globl obj ; CHECK: obj: diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll index 5aa266d..69abf6e 100644 --- a/llvm/test/CodeGen/X86/setcc-wide-types.ll +++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll @@ -1447,3 +1447,158 @@ define i1 @eq_i512_load_arg(ptr%p, i512 %b) { %r = icmp eq i512 %a, %b ret i1 %r } + +; Tests for any/allbits from memory. + +define i1 @anybits_i128_load_arg(ptr %w) { +; ANY-LABEL: anybits_i128_load_arg: +; ANY: # %bb.0: +; ANY-NEXT: movq (%rdi), %rax +; ANY-NEXT: orq 8(%rdi), %rax +; ANY-NEXT: setne %al +; ANY-NEXT: retq + %ld = load i128, ptr %w + %cmp = icmp ne i128 %ld, 0 + ret i1 %cmp +} + +define i1 @allbits_i128_load_arg(ptr %w) { +; SSE2-LABEL: allbits_i128_load_arg: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pcmpeqb (%rdi), %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE41-LABEL: allbits_i128_load_arg: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa (%rdi), %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: ptest %xmm1, %xmm0 +; SSE41-NEXT: setb %al +; SSE41-NEXT: retq +; +; AVXANY-LABEL: allbits_i128_load_arg: +; AVXANY: # %bb.0: +; AVXANY-NEXT: vmovdqa (%rdi), %xmm0 +; AVXANY-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVXANY-NEXT: vptest %xmm1, %xmm0 +; AVXANY-NEXT: setb %al +; AVXANY-NEXT: retq + %ld = load i128, ptr %w + %cmp = icmp eq i128 %ld, -1 + ret i1 %cmp +} + +define i1 @anybits_i256_load_arg(ptr %w) { +; ANY-LABEL: anybits_i256_load_arg: +; ANY: # %bb.0: +; ANY-NEXT: movq (%rdi), %rax +; ANY-NEXT: movq 8(%rdi), %rcx +; ANY-NEXT: orq 24(%rdi), %rcx +; ANY-NEXT: orq 16(%rdi), %rax +; ANY-NEXT: orq %rcx, %rax +; ANY-NEXT: setne %al +; ANY-NEXT: retq + %ld = load i256, ptr %w + %cmp = icmp ne i256 %ld, 0 + ret i1 %cmp +} + +define i1 @allbits_i256_load_arg(ptr %w) { +; SSE-LABEL: allbits_i256_load_arg: +; SSE: # %bb.0: +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %rcx +; SSE-NEXT: andq 24(%rdi), %rcx +; SSE-NEXT: andq 16(%rdi), %rax +; SSE-NEXT: andq %rcx, %rax +; SSE-NEXT: cmpq $-1, %rax +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1-LABEL: allbits_i256_load_arg: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqu (%rdi), %ymm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vptest %ymm1, %ymm0 +; AVX1-NEXT: setb %al +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: allbits_i256_load_arg: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vptest %ymm1, %ymm0 +; AVX2-NEXT: setb %al +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: allbits_i256_load_arg: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vptest %ymm1, %ymm0 +; AVX512-NEXT: setb %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %ld = load i256, ptr %w + %cmp = icmp eq i256 %ld, -1 + ret i1 %cmp +} + +define i1 @anybits_i512_load_arg(ptr %w) { +; ANY-LABEL: anybits_i512_load_arg: +; ANY: # %bb.0: +; ANY-NEXT: movq 16(%rdi), %rax +; ANY-NEXT: movq (%rdi), %rcx +; ANY-NEXT: movq 8(%rdi), %rdx +; ANY-NEXT: movq 24(%rdi), %rsi +; ANY-NEXT: orq 56(%rdi), %rsi +; ANY-NEXT: orq 40(%rdi), %rdx +; ANY-NEXT: orq %rsi, %rdx +; ANY-NEXT: orq 48(%rdi), %rax +; ANY-NEXT: orq 32(%rdi), %rcx +; ANY-NEXT: orq %rax, %rcx +; ANY-NEXT: orq %rdx, %rcx +; ANY-NEXT: setne %al +; ANY-NEXT: retq + %ld = load i512, ptr %w + %cmp = icmp ne i512 %ld, 0 + ret i1 %cmp +} + +define i1 @allbits_i512_load_arg(ptr %w) { +; NO512-LABEL: allbits_i512_load_arg: +; NO512: # %bb.0: +; NO512-NEXT: movq 16(%rdi), %rax +; NO512-NEXT: movq (%rdi), %rcx +; NO512-NEXT: movq 8(%rdi), %rdx +; NO512-NEXT: movq 24(%rdi), %rsi +; NO512-NEXT: andq 56(%rdi), %rsi +; NO512-NEXT: andq 40(%rdi), %rdx +; NO512-NEXT: andq %rsi, %rdx +; NO512-NEXT: andq 48(%rdi), %rax +; NO512-NEXT: andq 32(%rdi), %rcx +; NO512-NEXT: andq %rax, %rcx +; NO512-NEXT: andq %rdx, %rcx +; NO512-NEXT: cmpq $-1, %rcx +; NO512-NEXT: sete %al +; NO512-NEXT: retq +; +; AVX512-LABEL: allbits_i512_load_arg: +; AVX512: # %bb.0: +; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1 +; AVX512-NEXT: vpcmpneqd (%rdi), %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: sete %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %ld = load i512, ptr %w + %cmp = icmp eq i512 %ld, -1 + ret i1 %cmp +} |