aboutsummaryrefslogtreecommitdiff
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll17
-rw-r--r--llvm/test/CodeGen/NVPTX/f16-ex2.ll40
-rw-r--r--llvm/test/CodeGen/NVPTX/f32-ex2.ll7
-rw-r--r--llvm/test/CodeGen/X86/isel-llvm.sincos.ll133
-rw-r--r--llvm/test/CodeGen/X86/llvm.sincos.vec.ll404
-rwxr-xr-xllvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test13
-rw-r--r--llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll5
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll4
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll4
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll18
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll6
-rw-r--r--llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll247
-rw-r--r--llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll100
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll46
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll34
-rw-r--r--llvm/test/tools/llvm-config/paths.test16
16 files changed, 965 insertions, 129 deletions
diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
index 362586a..4fc506f 100644
--- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
+++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
@@ -87,6 +87,11 @@ declare void @llvm.nvvm.barrier(i32, i32)
declare void @llvm.nvvm.barrier.sync(i32)
declare void @llvm.nvvm.barrier.sync.cnt(i32, i32)
+declare float @llvm.nvvm.ex2.approx.f(float)
+declare double @llvm.nvvm.ex2.approx.d(double)
+declare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>)
+declare float @llvm.nvvm.ex2.approx.ftz.f(float)
+
; CHECK-LABEL: @simple_upgrade
define void @simple_upgrade(i32 %a, i64 %b, i16 %c) {
; CHECK: call i32 @llvm.bitreverse.i32(i32 %a)
@@ -355,3 +360,15 @@ define void @cta_barriers(i32 %x, i32 %y) {
call void @llvm.nvvm.barrier.sync.cnt(i32 %x, i32 %y)
ret void
}
+
+define void @nvvm_ex2_approx(float %a, double %b, half %c, <2 x half> %d) {
+; CHECK: call float @llvm.nvvm.ex2.approx.f32(float %a)
+; CHECK: call double @llvm.nvvm.ex2.approx.f64(double %b)
+; CHECK: call <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half> %d)
+; CHECK: call float @llvm.nvvm.ex2.approx.ftz.f32(float %a)
+ %r1 = call float @llvm.nvvm.ex2.approx.f(float %a)
+ %r2 = call double @llvm.nvvm.ex2.approx.d(double %b)
+ %r3 = call <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half> %d)
+ %r4 = call float @llvm.nvvm.ex2.approx.ftz.f(float %a)
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/f16-ex2.ll b/llvm/test/CodeGen/NVPTX/f16-ex2.ll
index ee79f9d..af3fe67 100644
--- a/llvm/test/CodeGen/NVPTX/f16-ex2.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-ex2.ll
@@ -1,12 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck --check-prefixes=CHECK-FP16 %s
-; RUN: %if ptxas-sm_75 && ptxas-isa-7.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %}
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx78 | FileCheck --check-prefixes=CHECK-FP16 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc < %s -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
target triple = "nvptx64-nvidia-cuda"
declare half @llvm.nvvm.ex2.approx.f16(half)
-declare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>)
+declare <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half>)
+declare bfloat @llvm.nvvm.ex2.approx.ftz.bf16(bfloat)
+declare <2 x bfloat> @llvm.nvvm.ex2.approx.ftz.v2bf16(<2 x bfloat>)
-; CHECK-LABEL: ex2_half
define half @ex2_half(half %0) {
; CHECK-FP16-LABEL: ex2_half(
; CHECK-FP16: {
@@ -21,7 +22,6 @@ define half @ex2_half(half %0) {
ret half %res
}
-; CHECK-LABEL: ex2_2xhalf
define <2 x half> @ex2_2xhalf(<2 x half> %0) {
; CHECK-FP16-LABEL: ex2_2xhalf(
; CHECK-FP16: {
@@ -32,6 +32,34 @@ define <2 x half> @ex2_2xhalf(<2 x half> %0) {
; CHECK-FP16-NEXT: ex2.approx.f16x2 %r2, %r1;
; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-FP16-NEXT: ret;
- %res = call <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half> %0)
+ %res = call <2 x half> @llvm.nvvm.ex2.approx.v2f16(<2 x half> %0)
ret <2 x half> %res
}
+
+define bfloat @ex2_bfloat(bfloat %0) {
+; CHECK-FP16-LABEL: ex2_bfloat(
+; CHECK-FP16: {
+; CHECK-FP16-NEXT: .reg .b16 %rs<3>;
+; CHECK-FP16-EMPTY:
+; CHECK-FP16-NEXT: // %bb.0:
+; CHECK-FP16-NEXT: ld.param.b16 %rs1, [ex2_bfloat_param_0];
+; CHECK-FP16-NEXT: ex2.approx.ftz.bf16 %rs2, %rs1;
+; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs2;
+; CHECK-FP16-NEXT: ret;
+ %res = call bfloat @llvm.nvvm.ex2.approx.ftz.bf16(bfloat %0)
+ ret bfloat %res
+}
+
+define <2 x bfloat> @ex2_2xbfloat(<2 x bfloat> %0) {
+; CHECK-FP16-LABEL: ex2_2xbfloat(
+; CHECK-FP16: {
+; CHECK-FP16-NEXT: .reg .b32 %r<3>;
+; CHECK-FP16-EMPTY:
+; CHECK-FP16-NEXT: // %bb.0:
+; CHECK-FP16-NEXT: ld.param.b32 %r1, [ex2_2xbfloat_param_0];
+; CHECK-FP16-NEXT: ex2.approx.ftz.bf16x2 %r2, %r1;
+; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-FP16-NEXT: ret;
+ %res = call <2 x bfloat> @llvm.nvvm.ex2.approx.ftz.v2bf16(<2 x bfloat> %0)
+ ret <2 x bfloat> %res
+}
diff --git a/llvm/test/CodeGen/NVPTX/f32-ex2.ll b/llvm/test/CodeGen/NVPTX/f32-ex2.ll
index 796d80d..97b9d35 100644
--- a/llvm/test/CodeGen/NVPTX/f32-ex2.ll
+++ b/llvm/test/CodeGen/NVPTX/f32-ex2.ll
@@ -3,7 +3,8 @@
; RUN: %if ptxas-sm_50 && ptxas-isa-3.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_50 -mattr=+ptx32 | %ptxas-verify -arch=sm_50 %}
target triple = "nvptx-nvidia-cuda"
-declare float @llvm.nvvm.ex2.approx.f(float)
+declare float @llvm.nvvm.ex2.approx.f32(float)
+declare float @llvm.nvvm.ex2.approx.ftz.f32(float)
; CHECK-LABEL: ex2_float
define float @ex2_float(float %0) {
@@ -16,7 +17,7 @@ define float @ex2_float(float %0) {
; CHECK-NEXT: ex2.approx.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %res = call float @llvm.nvvm.ex2.approx.f(float %0)
+ %res = call float @llvm.nvvm.ex2.approx.f32(float %0)
ret float %res
}
@@ -31,6 +32,6 @@ define float @ex2_float_ftz(float %0) {
; CHECK-NEXT: ex2.approx.ftz.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %res = call float @llvm.nvvm.ex2.approx.ftz.f(float %0)
+ %res = call float @llvm.nvvm.ex2.approx.ftz.f32(float %0)
ret float %res
}
diff --git a/llvm/test/CodeGen/X86/isel-llvm.sincos.ll b/llvm/test/CodeGen/X86/isel-llvm.sincos.ll
index 065710f..8576f8f 100644
--- a/llvm/test/CodeGen/X86/isel-llvm.sincos.ll
+++ b/llvm/test/CodeGen/X86/isel-llvm.sincos.ll
@@ -3,6 +3,9 @@
; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64
; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel=0 -fast-isel=0 | FileCheck %s --check-prefixes=X86,SDAG-X86
; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel=0 -fast-isel=0 | FileCheck %s --check-prefixes=X64,SDAG-X64
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 -mcpu=core2 | FileCheck %s --check-prefix=MACOS-SINCOS-STRET
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=MACOS-NOSINCOS-STRET
+
; TODO: The below RUN line will fails GISEL selection and will fallback to DAG selection due to lack of support for loads/stores in i686 mode, support is expected soon enough, for this reason the llvm/test/CodeGen/X86/GlobalISel/llvm.sincos.mir test is added for now because of the lack of support for i686 in GlobalISel.
; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X86
; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64
@@ -34,6 +37,29 @@ define { float, float } @test_sincos_f32(float %Val) nounwind {
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
+; MACOS-SINCOS-STRET-LABEL: test_sincos_f32:
+; MACOS-SINCOS-STRET: ## %bb.0:
+; MACOS-SINCOS-STRET-NEXT: pushq %rax
+; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret
+; MACOS-SINCOS-STRET-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; MACOS-SINCOS-STRET-NEXT: popq %rax
+; MACOS-SINCOS-STRET-NEXT: retq
+;
+; MACOS-NOSINCOS-STRET-LABEL: test_sincos_f32:
+; MACOS-NOSINCOS-STRET: ## %bb.0:
+; MACOS-NOSINCOS-STRET-NEXT: pushq %rax
+; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%rsp) ## 4-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: callq _sinf
+; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movss (%rsp), %xmm0 ## 4-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero
+; MACOS-NOSINCOS-STRET-NEXT: callq _cosf
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, %xmm1
+; MACOS-NOSINCOS-STRET-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero
+; MACOS-NOSINCOS-STRET-NEXT: popq %rax
+; MACOS-NOSINCOS-STRET-NEXT: retq
+;
; GISEL-X86-LABEL: test_sincos_f32:
; GISEL-X86: # %bb.0:
; GISEL-X86-NEXT: subl $28, %esp
@@ -93,6 +119,28 @@ define { double, double } @test_sincos_f64(double %Val) nounwind {
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
;
+; MACOS-SINCOS-STRET-LABEL: test_sincos_f64:
+; MACOS-SINCOS-STRET: ## %bb.0:
+; MACOS-SINCOS-STRET-NEXT: pushq %rax
+; MACOS-SINCOS-STRET-NEXT: callq ___sincos_stret
+; MACOS-SINCOS-STRET-NEXT: popq %rax
+; MACOS-SINCOS-STRET-NEXT: retq
+;
+; MACOS-NOSINCOS-STRET-LABEL: test_sincos_f64:
+; MACOS-NOSINCOS-STRET: ## %bb.0:
+; MACOS-NOSINCOS-STRET-NEXT: subq $24, %rsp
+; MACOS-NOSINCOS-STRET-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: callq _sin
+; MACOS-NOSINCOS-STRET-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 8-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero
+; MACOS-NOSINCOS-STRET-NEXT: callq _cos
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, %xmm1
+; MACOS-NOSINCOS-STRET-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 8-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero
+; MACOS-NOSINCOS-STRET-NEXT: addq $24, %rsp
+; MACOS-NOSINCOS-STRET-NEXT: retq
+;
; GISEL-X86-LABEL: test_sincos_f64:
; GISEL-X86: # %bb.0:
; GISEL-X86-NEXT: subl $44, %esp
@@ -153,6 +201,40 @@ define { x86_fp80, x86_fp80 } @test_sincos_f80(x86_fp80 %Val) nounwind {
; X64-NEXT: addq $56, %rsp
; X64-NEXT: retq
;
+; MACOS-SINCOS-STRET-LABEL: test_sincos_f80:
+; MACOS-SINCOS-STRET: ## %bb.0:
+; MACOS-SINCOS-STRET-NEXT: subq $40, %rsp
+; MACOS-SINCOS-STRET-NEXT: fldt {{[0-9]+}}(%rsp)
+; MACOS-SINCOS-STRET-NEXT: fld %st(0)
+; MACOS-SINCOS-STRET-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill
+; MACOS-SINCOS-STRET-NEXT: fstpt (%rsp)
+; MACOS-SINCOS-STRET-NEXT: callq _cosl
+; MACOS-SINCOS-STRET-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill
+; MACOS-SINCOS-STRET-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload
+; MACOS-SINCOS-STRET-NEXT: fstpt (%rsp)
+; MACOS-SINCOS-STRET-NEXT: callq _sinl
+; MACOS-SINCOS-STRET-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload
+; MACOS-SINCOS-STRET-NEXT: fxch %st(1)
+; MACOS-SINCOS-STRET-NEXT: addq $40, %rsp
+; MACOS-SINCOS-STRET-NEXT: retq
+;
+; MACOS-NOSINCOS-STRET-LABEL: test_sincos_f80:
+; MACOS-NOSINCOS-STRET: ## %bb.0:
+; MACOS-NOSINCOS-STRET-NEXT: subq $40, %rsp
+; MACOS-NOSINCOS-STRET-NEXT: fldt {{[0-9]+}}(%rsp)
+; MACOS-NOSINCOS-STRET-NEXT: fld %st(0)
+; MACOS-NOSINCOS-STRET-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill
+; MACOS-NOSINCOS-STRET-NEXT: fstpt (%rsp)
+; MACOS-NOSINCOS-STRET-NEXT: callq _cosl
+; MACOS-NOSINCOS-STRET-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Spill
+; MACOS-NOSINCOS-STRET-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload
+; MACOS-NOSINCOS-STRET-NEXT: fstpt (%rsp)
+; MACOS-NOSINCOS-STRET-NEXT: callq _sinl
+; MACOS-NOSINCOS-STRET-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) ## 10-byte Folded Reload
+; MACOS-NOSINCOS-STRET-NEXT: fxch %st(1)
+; MACOS-NOSINCOS-STRET-NEXT: addq $40, %rsp
+; MACOS-NOSINCOS-STRET-NEXT: retq
+;
; GISEL-X86-LABEL: test_sincos_f80:
; GISEL-X86: # %bb.0:
; GISEL-X86-NEXT: subl $60, %esp
@@ -288,6 +370,57 @@ define void @can_fold_with_call_in_chain(float %x, ptr noalias %a, ptr noalias %
; SDAG-X64-NEXT: popq %r14
; SDAG-X64-NEXT: retq
;
+; MACOS-SINCOS-STRET-LABEL: can_fold_with_call_in_chain:
+; MACOS-SINCOS-STRET: ## %bb.0: ## %entry
+; MACOS-SINCOS-STRET-NEXT: pushq %r14
+; MACOS-SINCOS-STRET-NEXT: pushq %rbx
+; MACOS-SINCOS-STRET-NEXT: subq $40, %rsp
+; MACOS-SINCOS-STRET-NEXT: movq %rsi, %rbx
+; MACOS-SINCOS-STRET-NEXT: movq %rdi, %r14
+; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
+; MACOS-SINCOS-STRET-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-SINCOS-STRET-NEXT: movq %r14, %rdi
+; MACOS-SINCOS-STRET-NEXT: movq %rbx, %rsi
+; MACOS-SINCOS-STRET-NEXT: callq _foo
+; MACOS-SINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
+; MACOS-SINCOS-STRET-NEXT: movss %xmm0, (%r14)
+; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; MACOS-SINCOS-STRET-NEXT: movss %xmm0, (%rbx)
+; MACOS-SINCOS-STRET-NEXT: addq $40, %rsp
+; MACOS-SINCOS-STRET-NEXT: popq %rbx
+; MACOS-SINCOS-STRET-NEXT: popq %r14
+; MACOS-SINCOS-STRET-NEXT: retq
+;
+; MACOS-NOSINCOS-STRET-LABEL: can_fold_with_call_in_chain:
+; MACOS-NOSINCOS-STRET: ## %bb.0: ## %entry
+; MACOS-NOSINCOS-STRET-NEXT: pushq %r14
+; MACOS-NOSINCOS-STRET-NEXT: pushq %rbx
+; MACOS-NOSINCOS-STRET-NEXT: pushq %rax
+; MACOS-NOSINCOS-STRET-NEXT: movq %rsi, %rbx
+; MACOS-NOSINCOS-STRET-NEXT: movq %rdi, %r14
+; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%rsp) ## 4-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: callq _sinf
+; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movss (%rsp), %xmm0 ## 4-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero
+; MACOS-NOSINCOS-STRET-NEXT: callq _cosf
+; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%rsp) ## 4-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movq %r14, %rdi
+; MACOS-NOSINCOS-STRET-NEXT: movq %rbx, %rsi
+; MACOS-NOSINCOS-STRET-NEXT: callq _foo
+; MACOS-NOSINCOS-STRET-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero
+; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%r14)
+; MACOS-NOSINCOS-STRET-NEXT: movss (%rsp), %xmm0 ## 4-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = mem[0],zero,zero,zero
+; MACOS-NOSINCOS-STRET-NEXT: movss %xmm0, (%rbx)
+; MACOS-NOSINCOS-STRET-NEXT: addq $8, %rsp
+; MACOS-NOSINCOS-STRET-NEXT: popq %rbx
+; MACOS-NOSINCOS-STRET-NEXT: popq %r14
+; MACOS-NOSINCOS-STRET-NEXT: retq
+;
; GISEL-X86-LABEL: can_fold_with_call_in_chain:
; GISEL-X86: # %bb.0: # %entry
; GISEL-X86-NEXT: pushl %ebx
diff --git a/llvm/test/CodeGen/X86/llvm.sincos.vec.ll b/llvm/test/CodeGen/X86/llvm.sincos.vec.ll
index 834dd78..9b02438 100644
--- a/llvm/test/CodeGen/X86/llvm.sincos.vec.ll
+++ b/llvm/test/CodeGen/X86/llvm.sincos.vec.ll
@@ -1,59 +1,213 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 5
-; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck -check-prefix=X86 %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck -check-prefix=X64 %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 | FileCheck --check-prefix=MACOS-SINCOS-STRET %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 | FileCheck --check-prefix=MACOS-NOSINCOS-STRET %s
define void @test_sincos_v4f32(<4 x float> %x, ptr noalias %out_sin, ptr noalias %out_cos) nounwind {
-; CHECK-LABEL: test_sincos_v4f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushl %edi
-; CHECK-NEXT: pushl %esi
-; CHECK-NEXT: subl $52, %esp
-; CHECK-NEXT: movl 84(%esp), %esi
-; CHECK-NEXT: flds 76(%esp)
-; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; CHECK-NEXT: flds 64(%esp)
-; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; CHECK-NEXT: flds 72(%esp)
-; CHECK-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; CHECK-NEXT: flds 68(%esp)
-; CHECK-NEXT: movl 80(%esp), %edi
-; CHECK-NEXT: leal 40(%esp), %eax
-; CHECK-NEXT: movl %eax, 8(%esp)
-; CHECK-NEXT: leal 4(%edi), %eax
-; CHECK-NEXT: movl %eax, 4(%esp)
-; CHECK-NEXT: fstps (%esp)
-; CHECK-NEXT: calll sincosf
-; CHECK-NEXT: leal 44(%esp), %eax
-; CHECK-NEXT: movl %eax, 8(%esp)
-; CHECK-NEXT: leal 8(%edi), %eax
-; CHECK-NEXT: movl %eax, 4(%esp)
-; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; CHECK-NEXT: fstps (%esp)
-; CHECK-NEXT: calll sincosf
-; CHECK-NEXT: leal 36(%esp), %eax
-; CHECK-NEXT: movl %eax, 8(%esp)
-; CHECK-NEXT: movl %edi, 4(%esp)
-; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; CHECK-NEXT: fstps (%esp)
-; CHECK-NEXT: calll sincosf
-; CHECK-NEXT: leal 48(%esp), %eax
-; CHECK-NEXT: movl %eax, 8(%esp)
-; CHECK-NEXT: addl $12, %edi
-; CHECK-NEXT: movl %edi, 4(%esp)
-; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; CHECK-NEXT: fstps (%esp)
-; CHECK-NEXT: calll sincosf
-; CHECK-NEXT: flds 36(%esp)
-; CHECK-NEXT: flds 40(%esp)
-; CHECK-NEXT: flds 44(%esp)
-; CHECK-NEXT: flds 48(%esp)
-; CHECK-NEXT: fstps 12(%esi)
-; CHECK-NEXT: fstps 8(%esi)
-; CHECK-NEXT: fstps 4(%esi)
-; CHECK-NEXT: fstps (%esi)
-; CHECK-NEXT: addl $52, %esp
-; CHECK-NEXT: popl %esi
-; CHECK-NEXT: popl %edi
-; CHECK-NEXT: retl
+; X86-LABEL: test_sincos_v4f32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $52, %esp
+; X86-NEXT: movl 84(%esp), %esi
+; X86-NEXT: flds 76(%esp)
+; X86-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: flds 64(%esp)
+; X86-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: flds 72(%esp)
+; X86-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: flds 68(%esp)
+; X86-NEXT: movl 80(%esp), %edi
+; X86-NEXT: leal 40(%esp), %eax
+; X86-NEXT: movl %eax, 8(%esp)
+; X86-NEXT: leal 4(%edi), %eax
+; X86-NEXT: movl %eax, 4(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll sincosf
+; X86-NEXT: leal 44(%esp), %eax
+; X86-NEXT: movl %eax, 8(%esp)
+; X86-NEXT: leal 8(%edi), %eax
+; X86-NEXT: movl %eax, 4(%esp)
+; X86-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll sincosf
+; X86-NEXT: leal 36(%esp), %eax
+; X86-NEXT: movl %eax, 8(%esp)
+; X86-NEXT: movl %edi, 4(%esp)
+; X86-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll sincosf
+; X86-NEXT: leal 48(%esp), %eax
+; X86-NEXT: movl %eax, 8(%esp)
+; X86-NEXT: addl $12, %edi
+; X86-NEXT: movl %edi, 4(%esp)
+; X86-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll sincosf
+; X86-NEXT: flds 36(%esp)
+; X86-NEXT: flds 40(%esp)
+; X86-NEXT: flds 44(%esp)
+; X86-NEXT: flds 48(%esp)
+; X86-NEXT: fstps 12(%esi)
+; X86-NEXT: fstps 8(%esi)
+; X86-NEXT: fstps 4(%esi)
+; X86-NEXT: fstps (%esi)
+; X86-NEXT: addl $52, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl
+;
+; X64-LABEL: test_sincos_v4f32:
+; X64: # %bb.0:
+; X64-NEXT: pushq %r14
+; X64-NEXT: pushq %rbx
+; X64-NEXT: subq $56, %rsp
+; X64-NEXT: movq %rsi, %rbx
+; X64-NEXT: movq %rdi, %r14
+; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; X64-NEXT: leaq 4(%rsp), %rdi
+; X64-NEXT: movq %rsp, %rsi
+; X64-NEXT: callq sincosf@PLT
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; X64-NEXT: leaq 12(%rsp), %rdi
+; X64-NEXT: leaq 8(%rsp), %rsi
+; X64-NEXT: callq sincosf@PLT
+; X64-NEXT: leaq 28(%rsp), %rdi
+; X64-NEXT: leaq 24(%rsp), %rsi
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: callq sincosf@PLT
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X64-NEXT: leaq 20(%rsp), %rdi
+; X64-NEXT: leaq 16(%rsp), %rsi
+; X64-NEXT: callq sincosf@PLT
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X64-NEXT: movups %xmm1, (%r14)
+; X64-NEXT: movups %xmm0, (%rbx)
+; X64-NEXT: addq $56, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %r14
+; X64-NEXT: retq
+;
+; MACOS-SINCOS-STRET-LABEL: test_sincos_v4f32:
+; MACOS-SINCOS-STRET: ## %bb.0:
+; MACOS-SINCOS-STRET-NEXT: pushq %r14
+; MACOS-SINCOS-STRET-NEXT: pushq %rbx
+; MACOS-SINCOS-STRET-NEXT: subq $104, %rsp
+; MACOS-SINCOS-STRET-NEXT: movq %rsi, %rbx
+; MACOS-SINCOS-STRET-NEXT: movq %rdi, %r14
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
+; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; MACOS-SINCOS-STRET-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; MACOS-SINCOS-STRET-NEXT: unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload
+; MACOS-SINCOS-STRET-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
+; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, %xmm1
+; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; MACOS-SINCOS-STRET-NEXT: callq ___sincosf_stret
+; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
+; MACOS-SINCOS-STRET-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; MACOS-SINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
+; MACOS-SINCOS-STRET-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; MACOS-SINCOS-STRET-NEXT: unpcklpd (%rsp), %xmm2 ## 16-byte Folded Reload
+; MACOS-SINCOS-STRET-NEXT: ## xmm2 = xmm2[0],mem[0]
+; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; MACOS-SINCOS-STRET-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
+; MACOS-SINCOS-STRET-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; MACOS-SINCOS-STRET-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; MACOS-SINCOS-STRET-NEXT: movups %xmm1, (%r14)
+; MACOS-SINCOS-STRET-NEXT: movups %xmm2, (%rbx)
+; MACOS-SINCOS-STRET-NEXT: addq $104, %rsp
+; MACOS-SINCOS-STRET-NEXT: popq %rbx
+; MACOS-SINCOS-STRET-NEXT: popq %r14
+; MACOS-SINCOS-STRET-NEXT: retq
+;
+; MACOS-NOSINCOS-STRET-LABEL: test_sincos_v4f32:
+; MACOS-NOSINCOS-STRET: ## %bb.0:
+; MACOS-NOSINCOS-STRET-NEXT: pushq %r14
+; MACOS-NOSINCOS-STRET-NEXT: pushq %rbx
+; MACOS-NOSINCOS-STRET-NEXT: subq $104, %rsp
+; MACOS-NOSINCOS-STRET-NEXT: movq %rsi, %rbx
+; MACOS-NOSINCOS-STRET-NEXT: movq %rdi, %r14
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: callq _cosf
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: callq _cosf
+; MACOS-NOSINCOS-STRET-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
+; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: callq _cosf
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: callq _cosf
+; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; MACOS-NOSINCOS-STRET-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
+; MACOS-NOSINCOS-STRET-NEXT: ## xmm1 = xmm1[0],mem[0]
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: callq _sinf
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: callq _sinf
+; MACOS-NOSINCOS-STRET-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
+; MACOS-NOSINCOS-STRET-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: callq _sinf
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: callq _sinf
+; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; MACOS-NOSINCOS-STRET-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
+; MACOS-NOSINCOS-STRET-NEXT: ## xmm1 = xmm1[0],mem[0]
+; MACOS-NOSINCOS-STRET-NEXT: movups %xmm1, (%r14)
+; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: movups %xmm0, (%rbx)
+; MACOS-NOSINCOS-STRET-NEXT: addq $104, %rsp
+; MACOS-NOSINCOS-STRET-NEXT: popq %rbx
+; MACOS-NOSINCOS-STRET-NEXT: popq %r14
+; MACOS-NOSINCOS-STRET-NEXT: retq
%result = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %x)
%result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0
%result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1
@@ -63,36 +217,120 @@ define void @test_sincos_v4f32(<4 x float> %x, ptr noalias %out_sin, ptr noalias
}
define void @test_sincos_v2f64(<2 x double> %x, ptr noalias %out_sin, ptr noalias %out_cos) nounwind {
-; CHECK-LABEL: test_sincos_v2f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushl %edi
-; CHECK-NEXT: pushl %esi
-; CHECK-NEXT: subl $52, %esp
-; CHECK-NEXT: movl 84(%esp), %esi
-; CHECK-NEXT: fldl 72(%esp)
-; CHECK-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
-; CHECK-NEXT: fldl 64(%esp)
-; CHECK-NEXT: movl 80(%esp), %edi
-; CHECK-NEXT: leal 24(%esp), %eax
-; CHECK-NEXT: movl %eax, 12(%esp)
-; CHECK-NEXT: movl %edi, 8(%esp)
-; CHECK-NEXT: fstpl (%esp)
-; CHECK-NEXT: calll sincos
-; CHECK-NEXT: leal 32(%esp), %eax
-; CHECK-NEXT: movl %eax, 12(%esp)
-; CHECK-NEXT: addl $8, %edi
-; CHECK-NEXT: movl %edi, 8(%esp)
-; CHECK-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
-; CHECK-NEXT: fstpl (%esp)
-; CHECK-NEXT: calll sincos
-; CHECK-NEXT: fldl 24(%esp)
-; CHECK-NEXT: fldl 32(%esp)
-; CHECK-NEXT: fstpl 8(%esi)
-; CHECK-NEXT: fstpl (%esi)
-; CHECK-NEXT: addl $52, %esp
-; CHECK-NEXT: popl %esi
-; CHECK-NEXT: popl %edi
-; CHECK-NEXT: retl
+; X86-LABEL: test_sincos_v2f64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $52, %esp
+; X86-NEXT: movl 84(%esp), %esi
+; X86-NEXT: fldl 72(%esp)
+; X86-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
+; X86-NEXT: fldl 64(%esp)
+; X86-NEXT: movl 80(%esp), %edi
+; X86-NEXT: leal 24(%esp), %eax
+; X86-NEXT: movl %eax, 12(%esp)
+; X86-NEXT: movl %edi, 8(%esp)
+; X86-NEXT: fstpl (%esp)
+; X86-NEXT: calll sincos
+; X86-NEXT: leal 32(%esp), %eax
+; X86-NEXT: movl %eax, 12(%esp)
+; X86-NEXT: addl $8, %edi
+; X86-NEXT: movl %edi, 8(%esp)
+; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
+; X86-NEXT: fstpl (%esp)
+; X86-NEXT: calll sincos
+; X86-NEXT: fldl 24(%esp)
+; X86-NEXT: fldl 32(%esp)
+; X86-NEXT: fstpl 8(%esi)
+; X86-NEXT: fstpl (%esi)
+; X86-NEXT: addl $52, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl
+;
+; X64-LABEL: test_sincos_v2f64:
+; X64: # %bb.0:
+; X64-NEXT: pushq %r14
+; X64-NEXT: pushq %rbx
+; X64-NEXT: subq $56, %rsp
+; X64-NEXT: movq %rsi, %rbx
+; X64-NEXT: movq %rdi, %r14
+; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT: leaq 24(%rsp), %rdi
+; X64-NEXT: leaq 16(%rsp), %rsi
+; X64-NEXT: callq sincos@PLT
+; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; X64-NEXT: leaq 8(%rsp), %rdi
+; X64-NEXT: movq %rsp, %rsi
+; X64-NEXT: callq sincos@PLT
+; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; X64-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
+; X64-NEXT: movups %xmm1, (%r14)
+; X64-NEXT: movups %xmm0, (%rbx)
+; X64-NEXT: addq $56, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %r14
+; X64-NEXT: retq
+;
+; MACOS-SINCOS-STRET-LABEL: test_sincos_v2f64:
+; MACOS-SINCOS-STRET: ## %bb.0:
+; MACOS-SINCOS-STRET-NEXT: pushq %r14
+; MACOS-SINCOS-STRET-NEXT: pushq %rbx
+; MACOS-SINCOS-STRET-NEXT: subq $56, %rsp
+; MACOS-SINCOS-STRET-NEXT: movq %rsi, %rbx
+; MACOS-SINCOS-STRET-NEXT: movq %rdi, %r14
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
+; MACOS-SINCOS-STRET-NEXT: callq ___sincos_stret
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-SINCOS-STRET-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-SINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
+; MACOS-SINCOS-STRET-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; MACOS-SINCOS-STRET-NEXT: callq ___sincos_stret
+; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
+; MACOS-SINCOS-STRET-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; MACOS-SINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
+; MACOS-SINCOS-STRET-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; MACOS-SINCOS-STRET-NEXT: movups %xmm1, (%r14)
+; MACOS-SINCOS-STRET-NEXT: movups %xmm2, (%rbx)
+; MACOS-SINCOS-STRET-NEXT: addq $56, %rsp
+; MACOS-SINCOS-STRET-NEXT: popq %rbx
+; MACOS-SINCOS-STRET-NEXT: popq %r14
+; MACOS-SINCOS-STRET-NEXT: retq
+;
+; MACOS-NOSINCOS-STRET-LABEL: test_sincos_v2f64:
+; MACOS-NOSINCOS-STRET: ## %bb.0:
+; MACOS-NOSINCOS-STRET-NEXT: pushq %r14
+; MACOS-NOSINCOS-STRET-NEXT: pushq %rbx
+; MACOS-NOSINCOS-STRET-NEXT: subq $56, %rsp
+; MACOS-NOSINCOS-STRET-NEXT: movq %rsi, %rbx
+; MACOS-NOSINCOS-STRET-NEXT: movq %rdi, %r14
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: callq _cos
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: callq _cos
+; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: callq _sin
+; MACOS-NOSINCOS-STRET-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
+; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: callq _sin
+; MACOS-NOSINCOS-STRET-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; MACOS-NOSINCOS-STRET-NEXT: movups %xmm1, (%r14)
+; MACOS-NOSINCOS-STRET-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; MACOS-NOSINCOS-STRET-NEXT: movups %xmm0, (%rbx)
+; MACOS-NOSINCOS-STRET-NEXT: addq $56, %rsp
+; MACOS-NOSINCOS-STRET-NEXT: popq %rbx
+; MACOS-NOSINCOS-STRET-NEXT: popq %r14
+; MACOS-NOSINCOS-STRET-NEXT: retq
%result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %x)
%result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
%result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
diff --git a/llvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test b/llvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test
new file mode 100755
index 0000000..aa3f6dc
--- /dev/null
+++ b/llvm/test/DebugInfo/PDB/Native/pdb-native-index-overflow.test
@@ -0,0 +1,13 @@
+; Test that the native PDB reader isn't crashed by index value bigger than
+; number of types in TPI or IPI stream
+; RUN: llvm-pdbutil dump %p/../Inputs/empty.pdb --type-index=20000000\
+; RUN: | FileCheck -check-prefixes=TYPES,NOT_FOUND %s
+; RUN: llvm-pdbutil dump %p/../Inputs/empty.pdb --id-index=20000000\
+; RUN: | FileCheck -check-prefixes=IDS,NOT_FOUND %s
+
+TYPES: Types (TPI Stream)
+IDS: Types (IPI Stream)
+NOT_FOUND:============================================================
+NOT_FOUND: Showing 1 records.
+NOT_FOUND: Type 0x1312D00 doesn't exist in TPI stream
+
diff --git a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll
index 14ee00d..2763860 100644
--- a/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll
+++ b/llvm/test/Transforms/IndVarSimplify/loop-guard-order.ll
@@ -114,7 +114,7 @@ define i32 @urem_order1(i32 %n) {
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; CHECK-NEXT: call void @foo()
-; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 3
+; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 3
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT_LOOPEXIT]]:
@@ -205,13 +205,12 @@ define i64 @test_loop_with_div_order_1(i64 %n) {
; CHECK-NEXT: [[PARITY_CHECK:%.*]] = icmp eq i64 [[IS_ODD]], 0
; CHECK-NEXT: br i1 [[PARITY_CHECK]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT]]
; CHECK: [[LOOP_PREHEADER]]:
-; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[UPPER_BOUND]], i64 1)
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[DUMMY:%.*]] = load volatile i64, ptr null, align 8
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UMAX]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[UPPER_BOUND]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT_LOOPEXIT:.*]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index bfee39ea..068f82c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -365,8 +365,8 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP6:%.*]] = alloca i8, i64 0, align 16
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> [[TMP7]], ptr [[TMP6]], i32 1
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]]
; CHECK-NEXT: store <2 x ptr> [[TMP8]], ptr [[TMP9]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll
index ea01489..0a9494e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll
@@ -10,8 +10,8 @@ define void @licm_replicate_call(double %x, ptr %dst) {
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP1:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00)
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
index 157b787..3558957 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
@@ -64,9 +64,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8
-; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]]
-; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
-; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP5]], i32 1
+; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR2:[0-9]+]]
+; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0
+; TFCOMMON-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
; TFCOMMON-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
@@ -79,7 +79,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
; TFCOMMON-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE6]]
; TFCOMMON: pred.store.if1:
-; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1
+; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0
; TFCOMMON-NEXT: store double [[TMP19]], ptr [[P]], align 8
; TFCOMMON-NEXT: br label [[PRED_STORE_CONTINUE6]]
; TFCOMMON: pred.store.continue2:
@@ -105,9 +105,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ]
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ]
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8
-; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]]
-; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0
-; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP9]], i32 1
+; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR2:[0-9]+]]
+; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0
+; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
@@ -120,7 +120,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1
; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
; TFA_INTERLEAVE: pred.store.if3:
-; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
+; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
; TFA_INTERLEAVE-NEXT: store double [[TMP22]], ptr [[P]], align 8
; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE5]]
; TFA_INTERLEAVE: pred.store.continue4:
@@ -134,7 +134,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1
; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
; TFA_INTERLEAVE: pred.store.if7:
-; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1
+; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0
; TFA_INTERLEAVE-NEXT: store double [[TMP34]], ptr [[P]], align 8
; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE9]]
; TFA_INTERLEAVE: pred.store.continue8:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
index 03087bb..4590dfc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
@@ -199,10 +199,8 @@ define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) {
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 7
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[A]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i1> [[TMP8]], i1 [[TMP10]], i32 1
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> [[TMP9]], i1 [[TMP10]], i32 2
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i1> [[TMP13]], i1 [[TMP10]], i32 3
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
new file mode 100644
index 0000000..8615401
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
+
+define void @hoist_invariant_load_noalias_due_to_memchecks(ptr %dst, ptr %invariant_ptr, i32 %n) {
+; CHECK-LABEL: define void @hoist_invariant_load_noalias_due_to_memchecks(
+; CHECK-SAME: ptr [[DST:%.*]], ptr [[INVARIANT_PTR:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 4
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[INVARIANT_PTR]], i64 4
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[INVARIANT_PTR]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[INV_VAL:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT: store i32 [[INV_VAL]], ptr [[GEP]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %inv_val = load i32, ptr %invariant_ptr, align 4
+ %gep = getelementptr inbounds i32, ptr %dst, i32 %iv
+ store i32 %inv_val, ptr %gep, align 4
+ %iv.next = add nuw nsw i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Test that loads with non-invariant addresses are not hoisted.
+define void @dont_hoist_variant_address(ptr %dst, ptr %src, i32 %n) {
+; CHECK-LABEL: define void @dont_hoist_variant_address(
+; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64
+; CHECK-NEXT: [[A1:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[SRC2]]
+; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
+; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
+; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP_DST]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
+ %val = load i32, ptr %gep.src, align 4
+ %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
+ store i32 %val, ptr %gep.dst, align 4
+ %iv.next = add nuw nsw i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Test that predicated loads are not hoisted.
+define void @dont_hoist_predicated_load(ptr %dst, ptr %invariant_ptr, ptr %cond_ptr, i32 %n) {
+; CHECK-LABEL: define void @dont_hoist_predicated_load(
+; CHECK-SAME: ptr [[DST:%.*]], ptr [[INVARIANT_PTR:%.*]], ptr [[COND_PTR:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64
+; CHECK-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[TMP20]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP22]], 4
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[COND_PTR]], i64 [[TMP3]]
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[INVARIANT_PTR]], i64 4
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[COND_PTR]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
+; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[INVARIANT_PTR]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[COND_PTR]], i32 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope [[META11:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
+; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14:![0-9]+]]
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP9]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META18:![0-9]+]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; CHECK: [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
+; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
+; CHECK: [[PRED_STORE_IF6]]:
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP8]]
+; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP13]], align 4, !alias.scope [[META16]], !noalias [[META18]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]]
+; CHECK: [[PRED_STORE_CONTINUE7]]:
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
+; CHECK: [[PRED_STORE_IF8]]:
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP12]]
+; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP17]], align 4, !alias.scope [[META16]], !noalias [[META18]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]]
+; CHECK: [[PRED_STORE_CONTINUE9]]:
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
+; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]]
+; CHECK: [[PRED_STORE_IF10]]:
+; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14]]
+; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 3
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]]
+; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP21]], align 4, !alias.scope [[META16]], !noalias [[META18]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]]
+; CHECK: [[PRED_STORE_CONTINUE11]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_COND:%.*]] = getelementptr inbounds i32, ptr [[COND_PTR]], i32 [[IV]]
+; CHECK-NEXT: [[COND:%.*]] = load i32, ptr [[GEP_COND]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[COND]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[INV_VAL:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT: store i32 [[INV_VAL]], ptr [[GEP]], align 4
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.cond = getelementptr inbounds i32, ptr %cond_ptr, i32 %iv
+ %cond = load i32, ptr %gep.cond, align 4
+ %cmp = icmp sgt i32 %cond, 0
+ br i1 %cmp, label %if.then, label %loop.latch
+
+if.then:
+ %inv_val = load i32, ptr %invariant_ptr, align 4
+ %gep = getelementptr inbounds i32, ptr %dst, i32 %iv
+ store i32 %inv_val, ptr %gep, align 4
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add nuw nsw i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll
new file mode 100644
index 0000000..857b913
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-metadata.ll
@@ -0,0 +1,100 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -disable-output %s 2>&1 | FileCheck %s
+
+define void @test_widen_metadata(ptr noalias %A, ptr noalias %B, i32 %n) {
+; CHECK-LABEL: Checking a loop in 'test_widen_metadata'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK: <x1> vector loop: {
+; CHECK: vector.body:
+; CHECK: WIDEN ir<%lv> = load vp<{{.*}}>
+; CHECK: WIDEN-CAST ir<%conv> = sitofp ir<%lv> to float
+; CHECK: WIDEN ir<%mul> = fmul ir<%conv>, ir<2.000000e+00>
+; CHECK: WIDEN-CAST ir<%conv.back> = fptosi ir<%mul> to i32
+; CHECK: WIDEN store vp<{{.*}}>, ir<%conv.back>
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %gep.A = getelementptr inbounds i32, ptr %A, i32 %i
+ %lv = load i32, ptr %gep.A, align 4, !tbaa !0, !range !6
+ %conv = sitofp i32 %lv to float, !fpmath !5
+ %mul = fmul float %conv, 2.0, !fpmath !5
+ %conv.back = fptosi float %mul to i32
+ %gep.B = getelementptr inbounds i32, ptr %B, i32 %i
+ store i32 %conv.back, ptr %gep.B, align 4, !tbaa !0
+ %i.next = add i32 %i, 1
+ %cond = icmp eq i32 %i.next, %n
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+declare float @llvm.sqrt.f32(float)
+
+define void @test_intrinsic_with_metadata(ptr noalias %A, ptr noalias %B, i32 %n) {
+; CHECK-LABEL: Checking a loop in 'test_intrinsic_with_metadata'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK: <x1> vector loop: {
+; CHECK: vector.body:
+; CHECK: WIDEN ir<%lv> = load vp<{{.*}}>
+; CHECK: WIDEN-INTRINSIC ir<%sqrt> = call llvm.sqrt(ir<%lv>)
+; CHECK: WIDEN store vp<{{.*}}>, ir<%sqrt>
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %gep.A = getelementptr inbounds float, ptr %A, i32 %i
+ %lv = load float, ptr %gep.A, align 4, !tbaa !0
+ %sqrt = call float @llvm.sqrt.f32(float %lv), !fpmath !5
+ %gep.B = getelementptr inbounds float, ptr %B, i32 %i
+ store float %sqrt, ptr %gep.B, align 4, !tbaa !0
+ %i.next = add i32 %i, 1
+ %cond = icmp eq i32 %i.next, %n
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @test_widen_with_multiple_metadata(ptr noalias %A, ptr noalias %B, i32 %n) {
+; CHECK-LABEL: Checking a loop in 'test_widen_with_multiple_metadata'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK: <x1> vector loop: {
+; CHECK: vector.body:
+; CHECK: WIDEN ir<%lv> = load vp<{{.*}}>
+; CHECK: WIDEN-CAST ir<%conv> = sitofp ir<%lv> to float
+; CHECK: WIDEN ir<%mul> = fmul ir<%conv>, ir<2.000000e+00>
+; CHECK: WIDEN-CAST ir<%conv.back> = fptosi ir<%mul> to i32
+; CHECK: WIDEN store vp<{{.*}}>, ir<%conv.back>
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %gep.A = getelementptr inbounds i32, ptr %A, i32 %i
+ %lv = load i32, ptr %gep.A, align 4, !tbaa !0, !range !6
+ %conv = sitofp i32 %lv to float
+ %mul = fmul float %conv, 2.0
+ %conv.back = fptosi float %mul to i32
+ %gep.B = getelementptr inbounds i32, ptr %B, i32 %i
+ store i32 %conv.back, ptr %gep.B, align 4, !tbaa !0
+ %i.next = add i32 %i, 1
+ %cond = icmp eq i32 %i.next, %n
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"float", !2}
+!2 = !{!"root"}
+!5 = !{float 2.500000e+00}
+!6 = !{i32 0, i32 100}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll
new file mode 100644
index 0000000..a35bcf1
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt -passes='default<O3>' -S %s | FileCheck %s
+
+target triple = "arm64-apple-macosx"
+
+%"class.dealii::VectorizedArray" = type { [4 x double] }
+
+define void @hoist_invariant_load(ptr %invariant_ptr, i64 %num_elements, ptr %array) {
+; CHECK-LABEL: define void @hoist_invariant_load(
+; CHECK-SAME: ptr readonly captures(none) [[INVARIANT_PTR:%.*]], i64 [[NUM_ELEMENTS:%.*]], ptr captures(none) [[ARRAY:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUM_ELEMENTS]], 0
+; CHECK-NEXT: br i1 [[CMP1_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH:.*]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[I2:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP_LATCH]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr nusw %"class.dealii::VectorizedArray", ptr [[ARRAY]], i64 [[I2]]
+; CHECK-NEXT: [[INVARIANT_VAL:%.*]] = load double, ptr [[INVARIANT_PTR]], align 8
+; CHECK-NEXT: [[ARRAY_VAL:%.*]] = load double, ptr [[GEP]], align 8
+; CHECK-NEXT: [[SUM:%.*]] = fadd double [[INVARIANT_VAL]], [[ARRAY_VAL]]
+; CHECK-NEXT: store double [[SUM]], ptr [[GEP]], align 8
+; CHECK-NEXT: [[I_NEXT]] = add nuw i64 [[I2]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[NUM_ELEMENTS]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %loop.latch, %entry
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ]
+ %cmp = icmp ult i64 %i, %num_elements
+ br i1 %cmp, label %loop.latch, label %exit
+
+loop.latch: ; preds = %loop.header
+ %gep = getelementptr nusw %"class.dealii::VectorizedArray", ptr %array, i64 %i
+ %invariant_val = load double, ptr %invariant_ptr, align 8
+ %array_val = load double, ptr %gep, align 8
+ %sum = fadd double %array_val, %invariant_val
+ store double %sum, ptr %gep, align 8
+ %i.next = add i64 %i, 1
+ br label %loop.header
+
+exit: ; preds = %loop.header
+ ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll
index d16843c..6629b12 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div-like-mixed-with-undefs.ll
@@ -1,21 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
-define ptr @test(ptr %d) {
+define ptr @test(ptr %d, i64 %v) {
; CHECK-LABEL: define ptr @test(
-; CHECK-SAME: ptr [[D:%.*]]) {
+; CHECK-SAME: ptr [[D:%.*]], i64 [[V:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr null, align 1
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[D]], align 1
; CHECK-NEXT: [[CMP4_2:%.*]] = icmp eq i8 [[TMP0]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = xor i64 0, 0
-; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 1, 0
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP4_2]], i64 0, i64 4
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 0, [[V]]
+; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 1, [[V]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x i64> poison, i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x i64> [[TMP5]], i64 [[TMP3]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <6 x i64> [[TMP6]], i64 [[TMP4]], i32 4
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <6 x i64> [[TMP7]], <6 x i64> poison, <6 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4>
-; CHECK-NEXT: [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], <i64 2, i64 6, i64 1, i64 1, i64 1, i64 0>
+; CHECK-NEXT: [[TMP9:%.*]] = mul <6 x i64> [[TMP8]], <i64 2, i64 6, i64 4, i64 3, i64 5, i64 4>
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <6 x i64> [[TMP9]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <6 x i64> [[TMP9]], i32 1
@@ -31,23 +31,23 @@ define ptr @test(ptr %d) {
; CHECK-NEXT: ret ptr [[TMP20]]
;
entry:
- %0 = load i8, ptr null, align 1
+ %0 = load i8, ptr %d, align 1
%cmp4.2 = icmp eq i8 %0, 0
- %1 = select i1 %cmp4.2, i64 0, i64 0
+ %1 = select i1 %cmp4.2, i64 0, i64 4
%2 = shl i64 %1, 1
%3 = getelementptr i8, ptr %d, i64 %2
- %4 = xor i64 0, 0
- %5 = udiv i64 %4, 0
+ %4 = xor i64 0, %v
+ %5 = udiv i64 %4, 3
%6 = mul i64 %5, 6
%7 = getelementptr i8, ptr %d, i64 %6
- %8 = shl i64 %1, 0
+ %8 = shl i64 %1, 2
%scevgep42 = getelementptr i8, ptr %d, i64 %8
- %9 = mul i64 %5, 1
+ %9 = mul i64 %5, 3
%10 = getelementptr i8, ptr %d, i64 %9
- %11 = udiv i64 1, 0
- %12 = mul i64 %11, 1
+ %11 = udiv i64 1, %v
+ %12 = mul i64 %11, 5
%13 = getelementptr i8, ptr %d, i64 %12
- %14 = mul i64 %11, 0
+ %14 = mul i64 %11, 4
%15 = getelementptr i8, ptr %d, i64 %14
ret ptr %15
}
diff --git a/llvm/test/tools/llvm-config/paths.test b/llvm/test/tools/llvm-config/paths.test
index 419f155..61d86f7 100644
--- a/llvm/test/tools/llvm-config/paths.test
+++ b/llvm/test/tools/llvm-config/paths.test
@@ -4,18 +4,34 @@ RUN: llvm-config --bindir 2>&1 | FileCheck --check-prefix=CHECK-BINDIR %s
CHECK-BINDIR: {{.*}}{{/|\\}}bin
CHECK-BINDIR-NOT: error:
CHECK-BINDIR-NOT: warning
+RUN: llvm-config --bindir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-BINDIR2 %s
+CHECK-BINDIR2: {{.*}}{{/|\\\\}}bin
+CHECK-BINDIR2-NOT: error:
+CHECK-BINDIR2-NOT: warning
RUN: llvm-config --includedir 2>&1 | FileCheck --check-prefix=CHECK-INCLUDEDIR %s
CHECK-INCLUDEDIR: {{.*}}{{/|\\}}include
CHECK-INCLUDEDIR-NOT: error:
CHECK-INCLUDEDIR-NOT: warning
+RUN: llvm-config --includedir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-INCLUDEDIR2 %s
+CHECK-INCLUDEDIR2: {{.*}}{{/|\\\\}}include
+CHECK-INCLUDEDIR2-NOT: error:
+CHECK-INCLUDEDIR2-NOT: warning
RUN: llvm-config --libdir 2>&1 | FileCheck --check-prefix=CHECK-LIBDIR %s
CHECK-LIBDIR: {{.*}}{{/|\\}}lib{{.*}}
CHECK-LIBDIR-NOT: error:
CHECK-LIBDIR-NOT: warning
+RUN: llvm-config --libdir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-LIBDIR2 %s
+CHECK-LIBDIR2: {{.*}}{{/|\\\\}}lib{{.*}}
+CHECK-LIBDIR2-NOT: error:
+CHECK-LIBDIR2-NOT: warning
RUN: llvm-config --cmakedir 2>&1 | FileCheck --check-prefix=CHECK-CMAKEDIR %s
CHECK-CMAKEDIR: {{.*}}{{/|\\}}cmake{{/|\\}}llvm
CHECK-CMAKEDIR-NOT: error:
CHECK-CMAKEDIR-NOT: warning
+RUN: llvm-config --cmakedir --quote-paths 2>&1 | FileCheck --check-prefix=CHECK-CMAKEDIR2 %s
+CHECK-CMAKEDIR2: {{.*}}{{/|\\\\}}cmake{{/|\\\\}}llvm
+CHECK-CMAKEDIR2-NOT: error:
+CHECK-CMAKEDIR2-NOT: warning