diff options
Diffstat (limited to 'llvm/test')
16 files changed, 965 insertions, 129 deletions
diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll index 362586a..4fc506f 100644 --- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll @@ -87,6 +87,11 @@ declare void @llvm.nvvm.barrier(i32, i32) declare void @llvm.nvvm.barrier.sync(i32) declare void @llvm.nvvm.barrier.sync.cnt(i32, i32) +declare float @llvm.nvvm.ex2.approx.f(float) +declare double @llvm.nvvm.ex2.approx.d(double) +declare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>) +declare float @llvm.nvvm.ex2.approx.ftz.f(float) + ; CHECK-LABEL: @simple_upgrade define void @simple_upgrade(i32 %a, i64 %b, i16 %c) { ; CHECK: call i32 @llvm.bitreverse.i32(i32 %a) @@ -355,3 +360,15 @@ define void @cta_barriers(i32 %x, i32 %y) { call void @llvm.nvvm.barrier.sync.cnt(i32 %x, i32 %y) ret void } + +define void @nvvm_ex2_approx(float %a, double %b, half %c, <2 x half> %d) { +; CHECK: call float @llvm.nvvm.ex2.approx.f32(float %a) +; CHECK: call double @llvm.nvvm.ex2.approx.f64(double %b) +; CHECK: call <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half> %d) +; CHECK: call float @llvm.nvvm.ex2.approx.ftz.f32(float %a) + %r1 = call float @llvm.nvvm.ex2.approx.f(float %a) + %r2 = call double @llvm.nvvm.ex2.approx.d(double %b) + %r3 = call <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half> %d) + %r4 = call float @llvm.nvvm.ex2.approx.ftz.f(float %a) + ret void +} diff --git a/llvm/test/CodeGen/NVPTX/f16-ex2.ll b/llvm/test/CodeGen/NVPTX/f16-ex2.ll index ee79f9d..af3fe67 100644 --- a/llvm/test/CodeGen/NVPTX/f16-ex2.ll +++ b/llvm/test/CodeGen/NVPTX/f16-ex2.ll @@ -1,12 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck --check-prefixes=CHECK-FP16 %s -; RUN: %if ptxas-sm_75 && ptxas-isa-7.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %} +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx78 | FileCheck --check-prefixes=CHECK-FP16 %s +; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc < %s -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %} target triple = "nvptx64-nvidia-cuda" declare half @llvm.nvvm.ex2.approx.f16(half) -declare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>) +declare <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half>) +declare bfloat @llvm.nvvm.ex2.approx.ftz.bf16(bfloat) +declare <2 x bfloat> @llvm.nvvm.ex2.approx.ftz.v2bf16(<2 x bfloat>) -; CHECK-LABEL: ex2_half define half @ex2_half(half %0) { ; CHECK-FP16-LABEL: ex2_half( ; CHECK-FP16: { @@ -21,7 +22,6 @@ define half @ex2_half(half %0) { ret half %res } -; CHECK-LABEL: ex2_2xhalf define <2 x half> @ex2_2xhalf(<2 x half> %0) { ; CHECK-FP16-LABEL: ex2_2xhalf( ; CHECK-FP16: { @@ -32,6 +32,34 @@ define <2 x half> @ex2_2xhalf(<2 x half> %0) { ; CHECK-FP16-NEXT: ex2.approx.f16x2 %r2, %r1; ; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-FP16-NEXT: ret; - %res = call <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half> %0) + %res = call <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half> %0) ret <2 x half> %res } + +define bfloat @ex2_bfloat(bfloat %0) { +; CHECK-FP16-LABEL: ex2_bfloat( +; CHECK-FP16: { +; CHECK-FP16-NEXT: .reg .b16 %rs<3>; +; CHECK-FP16-EMPTY: +; CHECK-FP16-NEXT: // %bb.0: +; CHECK-FP16-NEXT: ld.param.b16 %rs1, [ex2_bfloat_param_0]; +; CHECK-FP16-NEXT: ex2.approx.ftz.bf16 %rs2, %rs1; +; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs2; +; CHECK-FP16-NEXT: ret; + %res = call bfloat @llvm.nvvm.ex2.approx.ftz.bf16(bfloat %0) + ret bfloat %res +} + +define <2 x bfloat> @ex2_2xbfloat(<2 x bfloat> %0) { +; CHECK-FP16-LABEL: ex2_2xbfloat( +; CHECK-FP16: { +; CHECK-FP16-NEXT: .reg .b32 %r<3>; +; CHECK-FP16-EMPTY: +; CHECK-FP16-NEXT: // %bb.0: +; CHECK-FP16-NEXT: ld.param.b32 %r1, [ex2_2xbfloat_param_0]; +; CHECK-FP16-NEXT: ex2.approx.ftz.bf16x2 %r2, %r1; +; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-FP16-NEXT: ret; + %res = call <2 x bfloat> @llvm.nvvm.ex2.approx.ftz.v2bf16(<2 x bfloat> %0) + ret <2 x bfloat> %res +} diff --git a/llvm/test/CodeGen/NVPTX/f32-ex2.ll b/llvm/test/CodeGen/NVPTX/f32-ex2.ll index 796d80d..97b9d35 100644 --- a/llvm/test/CodeGen/NVPTX/f32-ex2.ll +++ b/llvm/test/CodeGen/NVPTX/f32-ex2.ll @@ -3,7 +3,8 @@ ; RUN: %if ptxas-sm_50 && ptxas-isa-3.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_50 -mattr=+ptx32 | %ptxas-verify -arch=sm_50 %} target triple = "nvptx-nvidia-cuda" -declare float @llvm.nvvm.ex2.approx.f(float) +declare float @llvm.nvvm.ex2.approx.f32(float) +declare float @llvm.nvvm.ex2.approx.ftz.f32(float) ; CHECK-LABEL: ex2_float define float @ex2_float(float %0) { @@ -16,7 +17,7 @@ define float @ex2_float(float %0) { ; CHECK-NEXT: ex2.approx.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; - %res = call float @llvm.nvvm.ex2.approx.f(float %0) + %res = call float @llvm.nvvm.ex2.approx.f32(float %0) ret float %res } @@ -31,6 +32,6 @@ define float @ex2_float_ftz(float %0) { ; CHECK-NEXT: ex2.approx.ftz.f32 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; - %res = call float @llvm.nvvm.ex2.approx.ftz.f(float %0) + %res = call float @llvm.nvvm.ex2.approx.ftz.f32(float %0) ret float %res } diff --git a/llvm/test/CodeGen/X86/isel-llvm.sincos.ll b/llvm/test/CodeGen/X86/isel-llvm.sincos.ll index 065710f..8576f8f 100644 --- a/llvm/test/CodeGen/X86/isel-llvm.sincos.ll +++ b/llvm/test/CodeGen/X86/isel-llvm.sincos.ll @@ -3,6 +3,9 @@ ; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel=0 -fast-isel=0 | FileCheck %s --check-prefixes=X86,SDAG-X86 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel=0 -fast-isel=0 | FileCheck %s --check-prefixes=X64,SDAG-X64 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 -mcpu=core2 | FileCheck %s --check-prefix=MACOS-SINCOS-STRET +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=MACOS-NOSINCOS-STRET + ; TODO: The below RUN line will fails GISEL selection and will fallback to DAG selection due to lack of support for loads/stores in i686 mode, support is expected soon enough, for this reason the llvm/test/CodeGen/X86/GlobalISel/llvm.sincos.mir test is added for now because of the lack of support for i686 in GlobalISel. ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X86 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 @@ -34,6 +37,29 @@ define { float, float } @test_sincos_f32(float %Val) nounwind { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; +; MACOS-SINCOS-STRET-LABEL: test_sincos_f32: +; MACOS-SINCOS-STRET: ## %bb.0: +; MACOS-SINCOS-STRET-NEXT: pushq %rax +; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; MACOS-SINCOS-STRET-NEXT: popq %rax +; MACOS-SINCOS-STRET-NEXT: retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_f32: +; MACOS-NOSINCOS-STRET: ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT: pushq %rax +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%rsp) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _sinf +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movss (%rsp), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT: callq _cosf +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, %xmm1 +; MACOS-NOSINCOS-STRET-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT: popq %rax +; MACOS-NOSINCOS-STRET-NEXT: retq +; ; GISEL-X86-LABEL: test_sincos_f32: ; GISEL-X86: # %bb.0: ; GISEL-X86-NEXT: subl $28, %esp @@ -93,6 +119,28 @@ define { double, double } @test_sincos_f64(double %Val) nounwind { ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq ; +; MACOS-SINCOS-STRET-LABEL: test_sincos_f64: +; MACOS-SINCOS-STRET: ## %bb.0: +; MACOS-SINCOS-STRET-NEXT: pushq %rax +; MACOS-SINCOS-STRET-NEXT: callq ___sincos_stret +; MACOS-SINCOS-STRET-NEXT: popq %rax +; MACOS-SINCOS-STRET-NEXT: retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_f64: +; MACOS-NOSINCOS-STRET: ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT: subq $24, %rsp +; MACOS-NOSINCOS-STRET-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _sin +; MACOS-NOSINCOS-STRET-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 8-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero +; MACOS-NOSINCOS-STRET-NEXT: callq _cos +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, %xmm1 +; MACOS-NOSINCOS-STRET-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 8-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero +; MACOS-NOSINCOS-STRET-NEXT: addq $24, %rsp +; MACOS-NOSINCOS-STRET-NEXT: retq +; ; GISEL-X86-LABEL: test_sincos_f64: ; GISEL-X86: # %bb.0: ; GISEL-X86-NEXT: subl $44, %esp @@ -153,6 +201,40 @@ define { x86_fp80, x86_fp80 } @test_sincos_f80(x86_fp80 %Val) nounwind { ; X64-NEXT: addq $56, %rsp ; X64-NEXT: retq ; +; MACOS-SINCOS-STRET-LABEL: test_sincos_f80: +; MACOS-SINCOS-STRET: ## %bb.0: +; MACOS-SINCOS-STRET-NEXT: subq $40, %rsp +; MACOS-SINCOS-STRET-NEXT: fldt {{[0-9]+}}(%rsp) +; MACOS-SINCOS-STRET-NEXT: fld %st(0) +; MACOS-SINCOS-STRET-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill +; MACOS-SINCOS-STRET-NEXT: fstpt (%rsp) +; MACOS-SINCOS-STRET-NEXT: callq _cosl +; MACOS-SINCOS-STRET-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill +; MACOS-SINCOS-STRET-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT: fstpt (%rsp) +; MACOS-SINCOS-STRET-NEXT: callq _sinl +; MACOS-SINCOS-STRET-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT: fxch %st(1) +; MACOS-SINCOS-STRET-NEXT: addq $40, %rsp +; MACOS-SINCOS-STRET-NEXT: retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_f80: +; MACOS-NOSINCOS-STRET: ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT: subq $40, %rsp +; MACOS-NOSINCOS-STRET-NEXT: fldt {{[0-9]+}}(%rsp) +; MACOS-NOSINCOS-STRET-NEXT: fld %st(0) +; MACOS-NOSINCOS-STRET-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill +; MACOS-NOSINCOS-STRET-NEXT: fstpt (%rsp) +; MACOS-NOSINCOS-STRET-NEXT: callq _cosl +; MACOS-NOSINCOS-STRET-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill +; MACOS-NOSINCOS-STRET-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT: fstpt (%rsp) +; MACOS-NOSINCOS-STRET-NEXT: callq _sinl +; MACOS-NOSINCOS-STRET-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT: fxch %st(1) +; MACOS-NOSINCOS-STRET-NEXT: addq $40, %rsp +; MACOS-NOSINCOS-STRET-NEXT: retq +; ; GISEL-X86-LABEL: test_sincos_f80: ; GISEL-X86: # %bb.0: ; GISEL-X86-NEXT: subl $60, %esp @@ -288,6 +370,57 @@ define void @can_fold_with_call_in_chain(float %x, ptr noalias %a, ptr noalias % ; SDAG-X64-NEXT: popq %r14 ; SDAG-X64-NEXT: retq ; +; MACOS-SINCOS-STRET-LABEL: can_fold_with_call_in_chain: +; MACOS-SINCOS-STRET: ## %bb.0: ## %entry +; MACOS-SINCOS-STRET-NEXT: pushq %r14 +; MACOS-SINCOS-STRET-NEXT: pushq %rbx +; MACOS-SINCOS-STRET-NEXT: subq $40, %rsp +; MACOS-SINCOS-STRET-NEXT: movq %rsi, %rbx +; MACOS-SINCOS-STRET-NEXT: movq %rdi, %r14 +; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movq %r14, %rdi +; MACOS-SINCOS-STRET-NEXT: movq %rbx, %rsi +; MACOS-SINCOS-STRET-NEXT: callq _foo +; MACOS-SINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: movss %xmm0, (%r14) +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: movss %xmm0, (%rbx) +; MACOS-SINCOS-STRET-NEXT: addq $40, %rsp +; MACOS-SINCOS-STRET-NEXT: popq %rbx +; MACOS-SINCOS-STRET-NEXT: popq %r14 +; MACOS-SINCOS-STRET-NEXT: retq +; +; MACOS-NOSINCOS-STRET-LABEL: can_fold_with_call_in_chain: +; MACOS-NOSINCOS-STRET: ## %bb.0: ## %entry +; MACOS-NOSINCOS-STRET-NEXT: pushq %r14 +; MACOS-NOSINCOS-STRET-NEXT: pushq %rbx +; MACOS-NOSINCOS-STRET-NEXT: pushq %rax +; MACOS-NOSINCOS-STRET-NEXT: movq %rsi, %rbx +; MACOS-NOSINCOS-STRET-NEXT: movq %rdi, %r14 +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%rsp) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _sinf +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movss (%rsp), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT: callq _cosf +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%rsp) ## 4-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movq %r14, %rdi +; MACOS-NOSINCOS-STRET-NEXT: movq %rbx, %rsi +; MACOS-NOSINCOS-STRET-NEXT: callq _foo +; MACOS-NOSINCOS-STRET-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%r14) +; MACOS-NOSINCOS-STRET-NEXT: movss (%rsp), %xmm0 ## 4-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero +; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%rbx) +; MACOS-NOSINCOS-STRET-NEXT: addq $8, %rsp +; MACOS-NOSINCOS-STRET-NEXT: popq %rbx +; MACOS-NOSINCOS-STRET-NEXT: popq %r14 +; MACOS-NOSINCOS-STRET-NEXT: retq +; ; GISEL-X86-LABEL: can_fold_with_call_in_chain: ; GISEL-X86: # %bb.0: # %entry ; GISEL-X86-NEXT: pushl %ebx diff --git a/llvm/test/CodeGen/X86/llvm.sincos.vec.ll b/llvm/test/CodeGen/X86/llvm.sincos.vec.ll index 834dd78..9b02438 100644 --- a/llvm/test/CodeGen/X86/llvm.sincos.vec.ll +++ b/llvm/test/CodeGen/X86/llvm.sincos.vec.ll @@ -1,59 +1,213 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 5 -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck -check-prefix=X86 %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck -check-prefix=X64 %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 | FileCheck --check-prefix=MACOS-SINCOS-STRET %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 | FileCheck --check-prefix=MACOS-NOSINCOS-STRET %s define void @test_sincos_v4f32(<4 x float> %x, ptr noalias %out_sin, ptr noalias %out_cos) nounwind { -; CHECK-LABEL: test_sincos_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: pushl %edi -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $52, %esp -; CHECK-NEXT: movl 84(%esp), %esi -; CHECK-NEXT: flds 76(%esp) -; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: flds 64(%esp) -; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: flds 72(%esp) -; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: flds 68(%esp) -; CHECK-NEXT: movl 80(%esp), %edi -; CHECK-NEXT: leal 40(%esp), %eax -; CHECK-NEXT: movl %eax, 8(%esp) -; CHECK-NEXT: leal 4(%edi), %eax -; CHECK-NEXT: movl %eax, 4(%esp) -; CHECK-NEXT: fstps (%esp) -; CHECK-NEXT: calll sincosf -; CHECK-NEXT: leal 44(%esp), %eax -; CHECK-NEXT: movl %eax, 8(%esp) -; CHECK-NEXT: leal 8(%edi), %eax -; CHECK-NEXT: movl %eax, 4(%esp) -; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; CHECK-NEXT: fstps (%esp) -; CHECK-NEXT: calll sincosf -; CHECK-NEXT: leal 36(%esp), %eax -; CHECK-NEXT: movl %eax, 8(%esp) -; CHECK-NEXT: movl %edi, 4(%esp) -; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; CHECK-NEXT: fstps (%esp) -; CHECK-NEXT: calll sincosf -; CHECK-NEXT: leal 48(%esp), %eax -; CHECK-NEXT: movl %eax, 8(%esp) -; CHECK-NEXT: addl $12, %edi -; CHECK-NEXT: movl %edi, 4(%esp) -; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; CHECK-NEXT: fstps (%esp) -; CHECK-NEXT: calll sincosf -; CHECK-NEXT: flds 36(%esp) -; CHECK-NEXT: flds 40(%esp) -; CHECK-NEXT: flds 44(%esp) -; CHECK-NEXT: flds 48(%esp) -; CHECK-NEXT: fstps 12(%esi) -; CHECK-NEXT: fstps 8(%esi) -; CHECK-NEXT: fstps 4(%esi) -; CHECK-NEXT: fstps (%esi) -; CHECK-NEXT: addl $52, %esp -; CHECK-NEXT: popl %esi -; CHECK-NEXT: popl %edi -; CHECK-NEXT: retl +; X86-LABEL: test_sincos_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $52, %esp +; X86-NEXT: movl 84(%esp), %esi +; X86-NEXT: flds 76(%esp) +; X86-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: flds 64(%esp) +; X86-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: flds 72(%esp) +; X86-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: flds 68(%esp) +; X86-NEXT: movl 80(%esp), %edi +; X86-NEXT: leal 40(%esp), %eax +; X86-NEXT: movl %eax, 8(%esp) +; X86-NEXT: leal 4(%edi), %eax +; X86-NEXT: movl %eax, 4(%esp) +; X86-NEXT: fstps (%esp) +; X86-NEXT: calll sincosf +; X86-NEXT: leal 44(%esp), %eax +; X86-NEXT: movl %eax, 8(%esp) +; X86-NEXT: leal 8(%edi), %eax +; X86-NEXT: movl %eax, 4(%esp) +; X86-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: fstps (%esp) +; X86-NEXT: calll sincosf +; X86-NEXT: leal 36(%esp), %eax +; X86-NEXT: movl %eax, 8(%esp) +; X86-NEXT: movl %edi, 4(%esp) +; X86-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: fstps (%esp) +; X86-NEXT: calll sincosf +; X86-NEXT: leal 48(%esp), %eax +; X86-NEXT: movl %eax, 8(%esp) +; X86-NEXT: addl $12, %edi +; X86-NEXT: movl %edi, 4(%esp) +; X86-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: fstps (%esp) +; X86-NEXT: calll sincosf +; X86-NEXT: flds 36(%esp) +; X86-NEXT: flds 40(%esp) +; X86-NEXT: flds 44(%esp) +; X86-NEXT: flds 48(%esp) +; X86-NEXT: fstps 12(%esi) +; X86-NEXT: fstps 8(%esi) +; X86-NEXT: fstps 4(%esi) +; X86-NEXT: fstps (%esi) +; X86-NEXT: addl $52, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; X64-LABEL: test_sincos_v4f32: +; X64: # %bb.0: +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $56, %rsp +; X64-NEXT: movq %rsi, %rbx +; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X64-NEXT: leaq 4(%rsp), %rdi +; X64-NEXT: movq %rsp, %rsi +; X64-NEXT: callq sincosf@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: leaq 12(%rsp), %rdi +; X64-NEXT: leaq 8(%rsp), %rsi +; X64-NEXT: callq sincosf@PLT +; X64-NEXT: leaq 28(%rsp), %rdi +; X64-NEXT: leaq 24(%rsp), %rsi +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: callq sincosf@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT: leaq 20(%rsp), %rdi +; X64-NEXT: leaq 16(%rsp), %rsi +; X64-NEXT: callq sincosf@PLT +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X64-NEXT: movups %xmm1, (%r14) +; X64-NEXT: movups %xmm0, (%rbx) +; X64-NEXT: addq $56, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r14 +; X64-NEXT: retq +; +; MACOS-SINCOS-STRET-LABEL: test_sincos_v4f32: +; MACOS-SINCOS-STRET: ## %bb.0: +; MACOS-SINCOS-STRET-NEXT: pushq %r14 +; MACOS-SINCOS-STRET-NEXT: pushq %rbx +; MACOS-SINCOS-STRET-NEXT: subq $104, %rsp +; MACOS-SINCOS-STRET-NEXT: movq %rsi, %rbx +; MACOS-SINCOS-STRET-NEXT: movq %rdi, %r14 +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-SINCOS-STRET-NEXT: unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, %xmm1 +; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; MACOS-SINCOS-STRET-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; MACOS-SINCOS-STRET-NEXT: unpcklpd (%rsp), %xmm2 ## 16-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT: ## xmm2 = xmm2[0],mem[0] +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload +; MACOS-SINCOS-STRET-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; MACOS-SINCOS-STRET-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; MACOS-SINCOS-STRET-NEXT: movups %xmm1, (%r14) +; MACOS-SINCOS-STRET-NEXT: movups %xmm2, (%rbx) +; MACOS-SINCOS-STRET-NEXT: addq $104, %rsp +; MACOS-SINCOS-STRET-NEXT: popq %rbx +; MACOS-SINCOS-STRET-NEXT: popq %r14 +; MACOS-SINCOS-STRET-NEXT: retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_v4f32: +; MACOS-NOSINCOS-STRET: ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT: pushq %r14 +; MACOS-NOSINCOS-STRET-NEXT: pushq %rbx +; MACOS-NOSINCOS-STRET-NEXT: subq $104, %rsp +; MACOS-NOSINCOS-STRET-NEXT: movq %rsi, %rbx +; MACOS-NOSINCOS-STRET-NEXT: movq %rdi, %r14 +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _cosf +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _cosf +; MACOS-NOSINCOS-STRET-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _cosf +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _cosf +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; MACOS-NOSINCOS-STRET-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm1 = xmm1[0],mem[0] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _sinf +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _sinf +; MACOS-NOSINCOS-STRET-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _sinf +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _sinf +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; MACOS-NOSINCOS-STRET-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload +; MACOS-NOSINCOS-STRET-NEXT: ## xmm1 = xmm1[0],mem[0] +; MACOS-NOSINCOS-STRET-NEXT: movups %xmm1, (%r14) +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: movups %xmm0, (%rbx) +; MACOS-NOSINCOS-STRET-NEXT: addq $104, %rsp +; MACOS-NOSINCOS-STRET-NEXT: popq %rbx +; MACOS-NOSINCOS-STRET-NEXT: popq %r14 +; MACOS-NOSINCOS-STRET-NEXT: retq %result = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %x) %result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0 %result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1 @@ -63,36 +217,120 @@ define void @test_sincos_v4f32(<4 x float> %x, ptr noalias %out_sin, ptr noalias } define void @test_sincos_v2f64(<2 x double> %x, ptr noalias %out_sin, ptr noalias %out_cos) nounwind { -; CHECK-LABEL: test_sincos_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: pushl %edi -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $52, %esp -; CHECK-NEXT: movl 84(%esp), %esi -; CHECK-NEXT: fldl 72(%esp) -; CHECK-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill -; CHECK-NEXT: fldl 64(%esp) -; CHECK-NEXT: movl 80(%esp), %edi -; CHECK-NEXT: leal 24(%esp), %eax -; CHECK-NEXT: movl %eax, 12(%esp) -; CHECK-NEXT: movl %edi, 8(%esp) -; CHECK-NEXT: fstpl (%esp) -; CHECK-NEXT: calll sincos -; CHECK-NEXT: leal 32(%esp), %eax -; CHECK-NEXT: movl %eax, 12(%esp) -; CHECK-NEXT: addl $8, %edi -; CHECK-NEXT: movl %edi, 8(%esp) -; CHECK-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload -; CHECK-NEXT: fstpl (%esp) -; CHECK-NEXT: calll sincos -; CHECK-NEXT: fldl 24(%esp) -; CHECK-NEXT: fldl 32(%esp) -; CHECK-NEXT: fstpl 8(%esi) -; CHECK-NEXT: fstpl (%esi) -; CHECK-NEXT: addl $52, %esp -; CHECK-NEXT: popl %esi -; CHECK-NEXT: popl %edi -; CHECK-NEXT: retl +; X86-LABEL: test_sincos_v2f64: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $52, %esp +; X86-NEXT: movl 84(%esp), %esi +; X86-NEXT: fldl 72(%esp) +; X86-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; X86-NEXT: fldl 64(%esp) +; X86-NEXT: movl 80(%esp), %edi +; X86-NEXT: leal 24(%esp), %eax +; X86-NEXT: movl %eax, 12(%esp) +; X86-NEXT: movl %edi, 8(%esp) +; X86-NEXT: fstpl (%esp) +; X86-NEXT: calll sincos +; X86-NEXT: leal 32(%esp), %eax +; X86-NEXT: movl %eax, 12(%esp) +; X86-NEXT: addl $8, %edi +; X86-NEXT: movl %edi, 8(%esp) +; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; X86-NEXT: fstpl (%esp) +; X86-NEXT: calll sincos +; X86-NEXT: fldl 24(%esp) +; X86-NEXT: fldl 32(%esp) +; X86-NEXT: fstpl 8(%esi) +; X86-NEXT: fstpl (%esi) +; X86-NEXT: addl $52, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; X64-LABEL: test_sincos_v2f64: +; X64: # %bb.0: +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $56, %rsp +; X64-NEXT: movq %rsi, %rbx +; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: leaq 24(%rsp), %rdi +; X64-NEXT: leaq 16(%rsp), %rsi +; X64-NEXT: callq sincos@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: leaq 8(%rsp), %rdi +; X64-NEXT: movq %rsp, %rsi +; X64-NEXT: callq sincos@PLT +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; X64-NEXT: movups %xmm1, (%r14) +; X64-NEXT: movups %xmm0, (%rbx) +; X64-NEXT: addq $56, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r14 +; X64-NEXT: retq +; +; MACOS-SINCOS-STRET-LABEL: test_sincos_v2f64: +; MACOS-SINCOS-STRET: ## %bb.0: +; MACOS-SINCOS-STRET-NEXT: pushq %r14 +; MACOS-SINCOS-STRET-NEXT: pushq %rbx +; MACOS-SINCOS-STRET-NEXT: subq $56, %rsp +; MACOS-SINCOS-STRET-NEXT: movq %rsi, %rbx +; MACOS-SINCOS-STRET-NEXT: movq %rdi, %r14 +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: callq ___sincos_stret +; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-SINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; MACOS-SINCOS-STRET-NEXT: callq ___sincos_stret +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; MACOS-SINCOS-STRET-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; MACOS-SINCOS-STRET-NEXT: movups %xmm1, (%r14) +; MACOS-SINCOS-STRET-NEXT: movups %xmm2, (%rbx) +; MACOS-SINCOS-STRET-NEXT: addq $56, %rsp +; MACOS-SINCOS-STRET-NEXT: popq %rbx +; MACOS-SINCOS-STRET-NEXT: popq %r14 +; MACOS-SINCOS-STRET-NEXT: retq +; +; MACOS-NOSINCOS-STRET-LABEL: test_sincos_v2f64: +; MACOS-NOSINCOS-STRET: ## %bb.0: +; MACOS-NOSINCOS-STRET-NEXT: pushq %r14 +; MACOS-NOSINCOS-STRET-NEXT: pushq %rbx +; MACOS-NOSINCOS-STRET-NEXT: subq $56, %rsp +; MACOS-NOSINCOS-STRET-NEXT: movq %rsi, %rbx +; MACOS-NOSINCOS-STRET-NEXT: movq %rdi, %r14 +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _cos +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: callq _cos +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _sin +; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: callq _sin +; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; MACOS-NOSINCOS-STRET-NEXT: movups %xmm1, (%r14) +; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; MACOS-NOSINCOS-STRET-NEXT: movups %xmm0, (%rbx) +; MACOS-NOSINCOS-STRET-NEXT: addq $56, %rsp +; MACOS-NOSINCOS-STRET-NEXT: popq %rbx +; MACOS-NOSINCOS-STRET-NEXT: popq %r14 +; MACOS-NOSINCOS-STRET-NEXT: retq %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %x) %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0 %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1 diff --git a/llvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test b/llvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test new file mode 100755 index 0000000..aa3f6dc --- /dev/null +++ b/llvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test @@ -0,0 +1,13 @@ +; Test that the native PDB reader isn't crashed by index value bigger than +; number of types in TPI or IPI stream +; RUN: llvm-pdbutil dump %p/../Inputs/empty.pdb --type-index=20000000\ +; RUN: | FileCheck -check-prefixes=TYPES,NOT_FOUND %s +; RUN: llvm-pdbutil dump %p/../Inputs/empty.pdb --id-index=20000000\ +; RUN: | FileCheck -check-prefixes=IDS,NOT_FOUND %s + +TYPES: Types (TPI Stream) +IDS: Types (IPI Stream) +NOT_FOUND:============================================================ +NOT_FOUND: Showing 1 records. +NOT_FOUND: Type 0x1312D00 doesn't exist in TPI stream + diff --git a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll index 14ee00d..2763860 100644 --- a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll +++ b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll @@ -114,7 +114,7 @@ define i32 @urem_order1(i32 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @foo() -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 3 +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 3 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT_LOOPEXIT]]: @@ -205,13 +205,12 @@ define i64 @test_loop_with_div_order_1(i64 %n) { ; CHECK-NEXT: [[PARITY_CHECK:%.*]] = icmp eq i64 [[IS_ODD]], 0 ; CHECK-NEXT: br i1 [[PARITY_CHECK]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]] ; CHECK: [[LOOP_PREHEADER]]: -; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[UPPER_BOUND]], i64 1) ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[DUMMY:%.*]] = load volatile i64, ptr null, align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UMAX]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UPPER_BOUND]] ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]] ; CHECK: [[EXIT_LOOPEXIT]]: ; CHECK-NEXT: br label %[[EXIT]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index bfee39ea..068f82c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -365,8 +365,8 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = alloca i8, i64 0, align 16 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> [[TMP7]], ptr [[TMP6]], i32 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x ptr> [[TMP8]], ptr [[TMP9]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll index ea01489..0a9494e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll @@ -10,8 +10,8 @@ define void @licm_replicate_call(double %x, ptr %dst) { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP1:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00) -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index 157b787..3558957 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -64,9 +64,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] ; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]] -; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP5]], i32 1 +; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR2:[0-9]+]] +; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 +; TFCOMMON-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer ; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; TFCOMMON-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 @@ -79,7 +79,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1 ; TFCOMMON-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE6]] ; TFCOMMON: pred.store.if1: -; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1 +; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0 ; TFCOMMON-NEXT: store double [[TMP19]], ptr [[P]], align 8 ; TFCOMMON-NEXT: br label [[PRED_STORE_CONTINUE6]] ; TFCOMMON: pred.store.continue2: @@ -105,9 +105,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ] ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]] -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0 -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP9]], i32 1 +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR2:[0-9]+]] +; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer ; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 @@ -120,7 +120,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] ; TFA_INTERLEAVE: pred.store.if3: -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0 ; TFA_INTERLEAVE-NEXT: store double [[TMP22]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE5]] ; TFA_INTERLEAVE: pred.store.continue4: @@ -134,7 +134,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] ; TFA_INTERLEAVE: pred.store.if7: -; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 +; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0 ; TFA_INTERLEAVE-NEXT: store double [[TMP34]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE9]] ; TFA_INTERLEAVE: pred.store.continue8: diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll index 03087bb..4590dfc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll @@ -199,10 +199,8 @@ define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) { ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 7 ; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[A]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i1> [[TMP8]], i1 [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> [[TMP9]], i1 [[TMP10]], i32 2 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i1> [[TMP13]], i1 [[TMP10]], i32 3 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll new file mode 100644 index 0000000..8615401 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll @@ -0,0 +1,247 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s + +define void @hoist_invariant_load_noalias_due_to_memchecks(ptr %dst, ptr %invariant_ptr, i32 %n) { +; CHECK-LABEL: define void @hoist_invariant_load_noalias_due_to_memchecks( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[INVARIANT_PTR:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 4 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[INVARIANT_PTR]], i64 4 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[INVARIANT_PTR]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META0:![0-9]+]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[INV_VAL:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] +; CHECK-NEXT: store i32 [[INV_VAL]], ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %inv_val = load i32, ptr %invariant_ptr, align 4 + %gep = getelementptr inbounds i32, ptr %dst, i32 %iv + store i32 %inv_val, ptr %gep, align 4 + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +; Test that loads with non-invariant addresses are not hoisted. +define void @dont_hoist_variant_address(ptr %dst, ptr %src, i32 %n) { +; CHECK-LABEL: define void @dont_hoist_variant_address( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64 +; CHECK-NEXT: [[A1:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[SRC2]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] +; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP_DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv + %val = load i32, ptr %gep.src, align 4 + %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv + store i32 %val, ptr %gep.dst, align 4 + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +; Test that predicated loads are not hoisted. +define void @dont_hoist_predicated_load(ptr %dst, ptr %invariant_ptr, ptr %cond_ptr, i32 %n) { +; CHECK-LABEL: define void @dont_hoist_predicated_load( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[INVARIANT_PTR:%.*]], ptr [[COND_PTR:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[TMP20]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP22]], 4 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[COND_PTR]], i64 [[TMP3]] +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[INVARIANT_PTR]], i64 4 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[COND_PTR]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]] +; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[INVARIANT_PTR]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[COND_PTR]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope [[META11:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP9]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META18:![0-9]+]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] +; CHECK: [[PRED_STORE_IF6]]: +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14]] +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP8]] +; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP13]], align 4, !alias.scope [[META16]], !noalias [[META18]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] +; CHECK: [[PRED_STORE_CONTINUE7]]: +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] +; CHECK: [[PRED_STORE_IF8]]: +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14]] +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP12]] +; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP17]], align 4, !alias.scope [[META16]], !noalias [[META18]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] +; CHECK: [[PRED_STORE_CONTINUE9]]: +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]] +; CHECK: [[PRED_STORE_IF10]]: +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14]] +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]] +; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP21]], align 4, !alias.scope [[META16]], !noalias [[META18]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]] +; CHECK: [[PRED_STORE_CONTINUE11]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_COND:%.*]] = getelementptr inbounds i32, ptr [[COND_PTR]], i32 [[IV]] +; CHECK-NEXT: [[COND:%.*]] = load i32, ptr [[GEP_COND]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[COND]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[LOOP_LATCH]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[INV_VAL:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] +; CHECK-NEXT: store i32 [[INV_VAL]], ptr [[GEP]], align 4 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.cond = getelementptr inbounds i32, ptr %cond_ptr, i32 %iv + %cond = load i32, ptr %gep.cond, align 4 + %cmp = icmp sgt i32 %cond, 0 + br i1 %cmp, label %if.then, label %loop.latch + +if.then: + %inv_val = load i32, ptr %invariant_ptr, align 4 + %gep = getelementptr inbounds i32, ptr %dst, i32 %iv + store i32 %inv_val, ptr %gep, align 4 + br label %loop.latch + +loop.latch: + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll new file mode 100644 index 0000000..857b913 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll @@ -0,0 +1,100 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -disable-output %s 2>&1 | FileCheck %s + +define void @test_widen_metadata(ptr noalias %A, ptr noalias %B, i32 %n) { +; CHECK-LABEL: Checking a loop in 'test_widen_metadata' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK: <x1> vector loop: { +; CHECK: vector.body: +; CHECK: WIDEN ir<%lv> = load vp<{{.*}}> +; CHECK: WIDEN-CAST ir<%conv> = sitofp ir<%lv> to float +; CHECK: WIDEN ir<%mul> = fmul ir<%conv>, ir<2.000000e+00> +; CHECK: WIDEN-CAST ir<%conv.back> = fptosi ir<%mul> to i32 +; CHECK: WIDEN store vp<{{.*}}>, ir<%conv.back> +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + %lv = load i32, ptr %gep.A, align 4, !tbaa !0, !range !6 + %conv = sitofp i32 %lv to float, !fpmath !5 + %mul = fmul float %conv, 2.0, !fpmath !5 + %conv.back = fptosi float %mul to i32 + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + store i32 %conv.back, ptr %gep.B, align 4, !tbaa !0 + %i.next = add i32 %i, 1 + %cond = icmp eq i32 %i.next, %n + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +declare float @llvm.sqrt.f32(float) + +define void @test_intrinsic_with_metadata(ptr noalias %A, ptr noalias %B, i32 %n) { +; CHECK-LABEL: Checking a loop in 'test_intrinsic_with_metadata' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK: <x1> vector loop: { +; CHECK: vector.body: +; CHECK: WIDEN ir<%lv> = load vp<{{.*}}> +; CHECK: WIDEN-INTRINSIC ir<%sqrt> = call llvm.sqrt(ir<%lv>) +; CHECK: WIDEN store vp<{{.*}}>, ir<%sqrt> +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + %gep.A = getelementptr inbounds float, ptr %A, i32 %i + %lv = load float, ptr %gep.A, align 4, !tbaa !0 + %sqrt = call float @llvm.sqrt.f32(float %lv), !fpmath !5 + %gep.B = getelementptr inbounds float, ptr %B, i32 %i + store float %sqrt, ptr %gep.B, align 4, !tbaa !0 + %i.next = add i32 %i, 1 + %cond = icmp eq i32 %i.next, %n + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +define void @test_widen_with_multiple_metadata(ptr noalias %A, ptr noalias %B, i32 %n) { +; CHECK-LABEL: Checking a loop in 'test_widen_with_multiple_metadata' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK: <x1> vector loop: { +; CHECK: vector.body: +; CHECK: WIDEN ir<%lv> = load vp<{{.*}}> +; CHECK: WIDEN-CAST ir<%conv> = sitofp ir<%lv> to float +; CHECK: WIDEN ir<%mul> = fmul ir<%conv>, ir<2.000000e+00> +; CHECK: WIDEN-CAST ir<%conv.back> = fptosi ir<%mul> to i32 +; CHECK: WIDEN store vp<{{.*}}>, ir<%conv.back> +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + %lv = load i32, ptr %gep.A, align 4, !tbaa !0, !range !6 + %conv = sitofp i32 %lv to float + %mul = fmul float %conv, 2.0 + %conv.back = fptosi float %mul to i32 + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + store i32 %conv.back, ptr %gep.B, align 4, !tbaa !0 + %i.next = add i32 %i, 1 + %cond = icmp eq i32 %i.next, %n + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +!0 = !{!1, !1, i64 0} +!1 = !{!"float", !2} +!2 = !{!"root"} +!5 = !{float 2.500000e+00} +!6 = !{i32 0, i32 100} diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll new file mode 100644 index 0000000..a35bcf1 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -passes='default<O3>' -S %s | FileCheck %s + +target triple = "arm64-apple-macosx" + +%"class.dealii::VectorizedArray" = type { [4 x double] } + +define void @hoist_invariant_load(ptr %invariant_ptr, i64 %num_elements, ptr %array) { +; CHECK-LABEL: define void @hoist_invariant_load( +; CHECK-SAME: ptr readonly captures(none) [[INVARIANT_PTR:%.*]], i64 [[NUM_ELEMENTS:%.*]], ptr captures(none) [[ARRAY:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUM_ELEMENTS]], 0 +; CHECK-NEXT: br i1 [[CMP1_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH:.*]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[I2:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP_LATCH]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nusw %"class.dealii::VectorizedArray", ptr [[ARRAY]], i64 [[I2]] +; CHECK-NEXT: [[INVARIANT_VAL:%.*]] = load double, ptr [[INVARIANT_PTR]], align 8 +; CHECK-NEXT: [[ARRAY_VAL:%.*]] = load double, ptr [[GEP]], align 8 +; CHECK-NEXT: [[SUM:%.*]] = fadd double [[INVARIANT_VAL]], [[ARRAY_VAL]] +; CHECK-NEXT: store double [[SUM]], ptr [[GEP]], align 8 +; CHECK-NEXT: [[I_NEXT]] = add nuw i64 [[I2]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[NUM_ELEMENTS]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_LATCH]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: ; preds = %loop.latch, %entry + %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ] + %cmp = icmp ult i64 %i, %num_elements + br i1 %cmp, label %loop.latch, label %exit + +loop.latch: ; preds = %loop.header + %gep = getelementptr nusw %"class.dealii::VectorizedArray", ptr %array, i64 %i + %invariant_val = load double, ptr %invariant_ptr, align 8 + %array_val = load double, ptr %gep, align 8 + %sum = fadd double %array_val, %invariant_val + store double %sum, ptr %gep, align 8 + %i.next = add i64 %i, 1 + br label %loop.header + +exit: ; preds = %loop.header + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll index d16843c..6629b12 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll @@ -1,21 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s -define ptr @test(ptr %d) { +define ptr @test(ptr %d, i64 %v) { ; CHECK-LABEL: define ptr @test( -; CHECK-SAME: ptr [[D:%.*]]) { +; CHECK-SAME: ptr [[D:%.*]], i64 [[V:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr null, align 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[D]], align 1 ; CHECK-NEXT: [[CMP4_2:%.*]] = icmp eq i8 [[TMP0]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = xor i64 0, 0 -; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 1, 0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 4 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 0, [[V]] +; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 1, [[V]] ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x i64> poison, i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x i64> [[TMP5]], i64 [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <6 x i64> [[TMP6]], i64 [[TMP4]], i32 4 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <6 x i64> [[TMP7]], <6 x i64> poison, <6 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4> -; CHECK-NEXT: [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], <i64 2, i64 6, i64 1, i64 1, i64 1, i64 0> +; CHECK-NEXT: [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], <i64 2, i64 6, i64 4, i64 3, i64 5, i64 4> ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <6 x i64> [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <6 x i64> [[TMP9]], i32 1 @@ -31,23 +31,23 @@ define ptr @test(ptr %d) { ; CHECK-NEXT: ret ptr [[TMP20]] ; entry: - %0 = load i8, ptr null, align 1 + %0 = load i8, ptr %d, align 1 %cmp4.2 = icmp eq i8 %0, 0 - %1 = select i1 %cmp4.2, i64 0, i64 0 + %1 = select i1 %cmp4.2, i64 0, i64 4 %2 = shl i64 %1, 1 %3 = getelementptr i8, ptr %d, i64 %2 - %4 = xor i64 0, 0 - %5 = udiv i64 %4, 0 + %4 = xor i64 0, %v + %5 = udiv i64 %4, 3 %6 = mul i64 %5, 6 %7 = getelementptr i8, ptr %d, i64 %6 - %8 = shl i64 %1, 0 + %8 = shl i64 %1, 2 %scevgep42 = getelementptr i8, ptr %d, i64 %8 - %9 = mul i64 %5, 1 + %9 = mul i64 %5, 3 %10 = getelementptr i8, ptr %d, i64 %9 - %11 = udiv i64 1, 0 - %12 = mul i64 %11, 1 + %11 = udiv i64 1, %v + %12 = mul i64 %11, 5 %13 = getelementptr i8, ptr %d, i64 %12 - %14 = mul i64 %11, 0 + %14 = mul i64 %11, 4 %15 = getelementptr i8, ptr %d, i64 %14 ret ptr %15 } diff --git a/llvm/test/tools/llvm-config/paths.test b/llvm/test/tools/llvm-config/paths.test index 419f155..61d86f7 100644 --- a/llvm/test/tools/llvm-config/paths.test +++ b/llvm/test/tools/llvm-config/paths.test @@ -4,18 +4,34 @@ RUN: llvm-config --bindir 2>&1 | FileCheck --check-prefix=CHECK-BINDIR %s CHECK-BINDIR: {{.*}}{{/|\\}}bin CHECK-BINDIR-NOT: error: CHECK-BINDIR-NOT: warning +RUN: llvm-config --bindir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-BINDIR2 %s +CHECK-BINDIR2: {{.*}}{{/|\\\\}}bin +CHECK-BINDIR2-NOT: error: +CHECK-BINDIR2-NOT: warning RUN: llvm-config --includedir 2>&1 | FileCheck --check-prefix=CHECK-INCLUDEDIR %s CHECK-INCLUDEDIR: {{.*}}{{/|\\}}include CHECK-INCLUDEDIR-NOT: error: CHECK-INCLUDEDIR-NOT: warning +RUN: llvm-config --includedir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-INCLUDEDIR2 %s +CHECK-INCLUDEDIR2: {{.*}}{{/|\\\\}}include +CHECK-INCLUDEDIR2-NOT: error: +CHECK-INCLUDEDIR2-NOT: warning RUN: llvm-config --libdir 2>&1 | FileCheck --check-prefix=CHECK-LIBDIR %s CHECK-LIBDIR: {{.*}}{{/|\\}}lib{{.*}} CHECK-LIBDIR-NOT: error: CHECK-LIBDIR-NOT: warning +RUN: llvm-config --libdir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-LIBDIR2 %s +CHECK-LIBDIR2: {{.*}}{{/|\\\\}}lib{{.*}} +CHECK-LIBDIR2-NOT: error: +CHECK-LIBDIR2-NOT: warning RUN: llvm-config --cmakedir 2>&1 | FileCheck --check-prefix=CHECK-CMAKEDIR %s CHECK-CMAKEDIR: {{.*}}{{/|\\}}cmake{{/|\\}}llvm CHECK-CMAKEDIR-NOT: error: CHECK-CMAKEDIR-NOT: warning +RUN: llvm-config --cmakedir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-CMAKEDIR2 %s +CHECK-CMAKEDIR2: {{.*}}{{/|\\\\}}cmake{{/|\\\\}}llvm +CHECK-CMAKEDIR2-NOT: error: +CHECK-CMAKEDIR2-NOT: warning |
