aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-08-14 20:21:30 +0000
committerSanjay Patel <spatel@rotateright.com>2019-08-14 20:21:30 +0000
commita8ba919c01df9e1d9792c8de2347411f3439b45a (patch)
treea1db8ac0e0800d34945d9a9324bf1ed83243fb8a
parent6169a730886b64c7b7aa069a019fd1dc032b61d8 (diff)
downloadllvm-a8ba919c01df9e1d9792c8de2347411f3439b45a.zip
llvm-a8ba919c01df9e1d9792c8de2347411f3439b45a.tar.gz
llvm-a8ba919c01df9e1d9792c8de2347411f3439b45a.tar.bz2
[x86] add tests for fadd reduction; NFC
More coverage for D66236. llvm-svn: 368913
-rw-r--r--llvm/test/CodeGen/X86/haddsub.ll132
1 files changed, 132 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/haddsub.ll b/llvm/test/CodeGen/X86/haddsub.ll
index a349065..78270a5 100644
--- a/llvm/test/CodeGen/X86/haddsub.ll
+++ b/llvm/test/CodeGen/X86/haddsub.ll
@@ -1985,3 +1985,135 @@ define float @hadd32_16_optsize(<16 x float> %x225) optsize {
%x230 = extractelement <16 x float> %x229, i32 0
ret float %x230
}
+
+define float @partial_reduction_fadd_v8f32(<8 x float> %x) {
+; SSE3-SLOW-LABEL: partial_reduction_fadd_v8f32:
+; SSE3-SLOW: # %bb.0:
+; SSE3-SLOW-NEXT: movaps %xmm0, %xmm1
+; SSE3-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-SLOW-NEXT: addps %xmm0, %xmm1
+; SSE3-SLOW-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE3-SLOW-NEXT: addss %xmm0, %xmm1
+; SSE3-SLOW-NEXT: movaps %xmm1, %xmm0
+; SSE3-SLOW-NEXT: retq
+;
+; SSE3-FAST-LABEL: partial_reduction_fadd_v8f32:
+; SSE3-FAST: # %bb.0:
+; SSE3-FAST-NEXT: movaps %xmm0, %xmm1
+; SSE3-FAST-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-FAST-NEXT: addps %xmm0, %xmm1
+; SSE3-FAST-NEXT: haddps %xmm1, %xmm1
+; SSE3-FAST-NEXT: movaps %xmm1, %xmm0
+; SSE3-FAST-NEXT: retq
+;
+; AVX-SLOW-LABEL: partial_reduction_fadd_v8f32:
+; AVX-SLOW: # %bb.0:
+; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT: vzeroupper
+; AVX-SLOW-NEXT: retq
+;
+; AVX-FAST-LABEL: partial_reduction_fadd_v8f32:
+; AVX-FAST: # %bb.0:
+; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-FAST-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT: vzeroupper
+; AVX-FAST-NEXT: retq
+ %x23 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x0213 = fadd <8 x float> %x, %x23
+ %x13 = shufflevector <8 x float> %x0213, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x0123 = fadd nsz reassoc <8 x float> %x0213, %x13
+ %r = extractelement <8 x float> %x0123, i32 0
+ ret float %r
+}
+
+define float @partial_reduction_fadd_v8f32_wrong_flags(<8 x float> %x) {
+; SSE3-SLOW-LABEL: partial_reduction_fadd_v8f32_wrong_flags:
+; SSE3-SLOW: # %bb.0:
+; SSE3-SLOW-NEXT: movaps %xmm0, %xmm1
+; SSE3-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-SLOW-NEXT: addps %xmm0, %xmm1
+; SSE3-SLOW-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE3-SLOW-NEXT: addss %xmm0, %xmm1
+; SSE3-SLOW-NEXT: movaps %xmm1, %xmm0
+; SSE3-SLOW-NEXT: retq
+;
+; SSE3-FAST-LABEL: partial_reduction_fadd_v8f32_wrong_flags:
+; SSE3-FAST: # %bb.0:
+; SSE3-FAST-NEXT: movaps %xmm0, %xmm1
+; SSE3-FAST-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-FAST-NEXT: addps %xmm0, %xmm1
+; SSE3-FAST-NEXT: haddps %xmm1, %xmm1
+; SSE3-FAST-NEXT: movaps %xmm1, %xmm0
+; SSE3-FAST-NEXT: retq
+;
+; AVX-SLOW-LABEL: partial_reduction_fadd_v8f32_wrong_flags:
+; AVX-SLOW: # %bb.0:
+; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT: vzeroupper
+; AVX-SLOW-NEXT: retq
+;
+; AVX-FAST-LABEL: partial_reduction_fadd_v8f32_wrong_flags:
+; AVX-FAST: # %bb.0:
+; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-FAST-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT: vzeroupper
+; AVX-FAST-NEXT: retq
+ %x23 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x0213 = fadd fast <8 x float> %x, %x23
+ %x13 = shufflevector <8 x float> %x0213, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x0123 = fadd ninf nnan <8 x float> %x0213, %x13
+ %r = extractelement <8 x float> %x0123, i32 0
+ ret float %r
+}
+
+define float @partial_reduction_fadd_v16f32(<16 x float> %x) {
+; SSE3-SLOW-LABEL: partial_reduction_fadd_v16f32:
+; SSE3-SLOW: # %bb.0:
+; SSE3-SLOW-NEXT: movaps %xmm0, %xmm1
+; SSE3-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-SLOW-NEXT: addps %xmm0, %xmm1
+; SSE3-SLOW-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE3-SLOW-NEXT: addss %xmm0, %xmm1
+; SSE3-SLOW-NEXT: movaps %xmm1, %xmm0
+; SSE3-SLOW-NEXT: retq
+;
+; SSE3-FAST-LABEL: partial_reduction_fadd_v16f32:
+; SSE3-FAST: # %bb.0:
+; SSE3-FAST-NEXT: movaps %xmm0, %xmm1
+; SSE3-FAST-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-FAST-NEXT: addps %xmm0, %xmm1
+; SSE3-FAST-NEXT: haddps %xmm1, %xmm1
+; SSE3-FAST-NEXT: movaps %xmm1, %xmm0
+; SSE3-FAST-NEXT: retq
+;
+; AVX-SLOW-LABEL: partial_reduction_fadd_v16f32:
+; AVX-SLOW: # %bb.0:
+; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT: vzeroupper
+; AVX-SLOW-NEXT: retq
+;
+; AVX-FAST-LABEL: partial_reduction_fadd_v16f32:
+; AVX-FAST: # %bb.0:
+; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-FAST-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT: vzeroupper
+; AVX-FAST-NEXT: retq
+ %x23 = shufflevector <16 x float> %x, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x0213 = fadd <16 x float> %x, %x23
+ %x13 = shufflevector <16 x float> %x0213, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x0123 = fadd reassoc nsz <16 x float> %x0213, %x13
+ %r = extractelement <16 x float> %x0123, i32 0
+ ret float %r
+}