diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r-- | llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fpclamptosat.ll | 45 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fpclamptosat_vec.ll | 105 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec-strict-cmp-128.ll | 32 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshl-sub128.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-fshr-sub128.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-sext.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-zext.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll | 8 |
14 files changed, 173 insertions, 95 deletions
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll index dec829f..44cf4e8 100644 --- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll +++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll @@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 16(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: paddb (%rdx), %xmm0 ; SSE2-NEXT: movdqa %xmm0, (%rcx) @@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 32(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] @@ -4155,7 +4155,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 48(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE2-NEXT: paddb (%rdx), %xmm2 diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll index 3d4cddb..89b5c33 100644 --- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll +++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll @@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in. ; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: paddb (%rsi), %xmm1 ; SSE2-NEXT: movdqa %xmm1, (%rdx) @@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in. ; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] @@ -3335,7 +3335,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in. ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa (%rdi), %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; SSE2-NEXT: paddb (%rsi), %xmm2 diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll index 3f5ec7b..67483be 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat.ll @@ -161,8 +161,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32(half %x) nounwind { -; CHECK-LABEL: utesth_f16i32: +define i32 @utest_f16i32(half %x) nounwind { +; CHECK-LABEL: utest_f16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -360,8 +360,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16(half %x) nounwind { -; CHECK-LABEL: utesth_f16i16: +define i16 @utest_f16i16(half %x) nounwind { +; CHECK-LABEL: utest_f16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -566,8 +566,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64(half %x) nounwind { -; CHECK-LABEL: utesth_f16i64: +define i64 @utest_f16i64(half %x) nounwind { +; CHECK-LABEL: utest_f16i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __fixunshfti@PLT @@ -762,8 +762,8 @@ entry: ret i32 %conv6 } -define i32 @utesth_f16i32_mm(half %x) nounwind { -; CHECK-LABEL: utesth_f16i32_mm: +define i32 @utest_f16i32_mm(half %x) nounwind { +; CHECK-LABEL: utest_f16i32_mm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -946,8 +946,8 @@ entry: ret i16 %conv6 } -define i16 @utesth_f16i16_mm(half %x) nounwind { -; CHECK-LABEL: utesth_f16i16_mm: +define i16 @utest_f16i16_mm(half %x) nounwind { +; CHECK-LABEL: utest_f16i16_mm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __extendhfsf2@PLT @@ -1131,8 +1131,8 @@ entry: ret i64 %conv6 } -define i64 @utesth_f16i64_mm(half %x) nounwind { -; CHECK-LABEL: utesth_f16i64_mm: +define i64 @utest_f16i64_mm(half %x) nounwind { +; CHECK-LABEL: utest_f16i64_mm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq __fixunshfti@PLT @@ -1170,6 +1170,27 @@ entry: ret i64 %conv6 } +; i32 non saturate + +define i32 @ustest_f16i32_nsat(half %x) nounwind { +; CHECK-LABEL: ustest_f16i32_nsat: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: cvttss2si %xmm0, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: sarl $31, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: cmovlel %edx, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq + %conv = fptosi half %x to i32 + %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv) + %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + ret i32 %spec.store.select7 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll index 1a2cfd6..991ce33 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll @@ -747,8 +747,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i32: +define <4 x i32> @utest_f16i32(<4 x half> %x) nounwind { +; SSE-LABEL: utest_f16i32: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movaps %xmm0, %xmm1 @@ -835,7 +835,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i32: +; AVX2-LABEL: utest_f16i32: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2 @@ -893,7 +893,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i32: +; AVX512-LABEL: utest_f16i32: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0 @@ -1338,8 +1338,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i16: +define <8 x i16> @utest_f16i16(<8 x half> %x) nounwind { +; SSE-LABEL: utest_f16i16: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill @@ -1436,7 +1436,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i16: +; AVX2-LABEL: utest_f16i16: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] @@ -1453,7 +1453,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i16: +; AVX512-LABEL: utest_f16i16: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0 @@ -2456,8 +2456,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i64: +define <2 x i64> @utest_f16i64(<2 x half> %x) nounwind { +; SSE-LABEL: utest_f16i64: ; SSE: # %bb.0: # %entry ; SSE-NEXT: pushq %r14 ; SSE-NEXT: pushq %rbx @@ -2483,7 +2483,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind { ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i64: +; AVX2-LABEL: utest_f16i64: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: pushq %r14 ; AVX2-NEXT: pushq %rbx @@ -2508,7 +2508,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind { ; AVX2-NEXT: popq %r14 ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i64: +; AVX512-LABEL: utest_f16i64: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: pushq %r14 ; AVX512-NEXT: pushq %rbx @@ -3359,8 +3359,8 @@ entry: ret <4 x i32> %conv6 } -define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i32_mm: +define <4 x i32> @utest_f16i32_mm(<4 x half> %x) nounwind { +; SSE-LABEL: utest_f16i32_mm: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movaps %xmm0, %xmm1 @@ -3447,7 +3447,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i32_mm: +; AVX2-LABEL: utest_f16i32_mm: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2 @@ -3505,7 +3505,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i32_mm: +; AVX512-LABEL: utest_f16i32_mm: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0 @@ -3935,8 +3935,8 @@ entry: ret <8 x i16> %conv6 } -define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i16_mm: +define <8 x i16> @utest_f16i16_mm(<8 x half> %x) nounwind { +; SSE-LABEL: utest_f16i16_mm: ; SSE: # %bb.0: # %entry ; SSE-NEXT: subq $72, %rsp ; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill @@ -4033,7 +4033,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind { ; SSE-NEXT: addq $72, %rsp ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i16_mm: +; AVX2-LABEL: utest_f16i16_mm: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] @@ -4050,7 +4050,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i16_mm: +; AVX512-LABEL: utest_f16i16_mm: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 ; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0 @@ -4820,8 +4820,8 @@ entry: ret <2 x i64> %conv6 } -define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind { -; SSE-LABEL: utesth_f16i64_mm: +define <2 x i64> @utest_f16i64_mm(<2 x half> %x) nounwind { +; SSE-LABEL: utest_f16i64_mm: ; SSE: # %bb.0: # %entry ; SSE-NEXT: pushq %r14 ; SSE-NEXT: pushq %rbx @@ -4847,7 +4847,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind { ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq ; -; AVX2-LABEL: utesth_f16i64_mm: +; AVX2-LABEL: utest_f16i64_mm: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: pushq %r14 ; AVX2-NEXT: pushq %rbx @@ -4872,7 +4872,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind { ; AVX2-NEXT: popq %r14 ; AVX2-NEXT: retq ; -; AVX512-LABEL: utesth_f16i64_mm: +; AVX512-LABEL: utest_f16i64_mm: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: pushq %r14 ; AVX512-NEXT: pushq %rbx @@ -4974,6 +4974,63 @@ entry: ret <2 x i64> %conv6 } +; i32 non saturate + +define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) nounwind { +; SSE-LABEL: ustest_f16i32_nsat: +; SSE: # %bb.0: # %entry +; SSE-NEXT: subq $72, %rsp +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; SSE-NEXT: psrlq $48, %xmm0 +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE-NEXT: cvttps2dq %xmm1, %xmm0 +; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE-NEXT: pand %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: addq $72, %rsp +; SSE-NEXT: retq +; +; AVX-LABEL: ustest_f16i32_nsat: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %conv = fptosi <4 x half> %x to <4 x i32> + %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv) + %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer) + ret <4 x i32> %spec.store.select7 +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll index ecd9435..1766b4d 100644 --- a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll +++ b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll @@ -58,7 +58,7 @@ define <8 x float> @foo8(<8 x float> %v, ptr%p) nounwind { define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind { ; AVX2-LABEL: undef_splatmask: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2] ; AVX2-NEXT: retq %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef> %res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> @@ -68,7 +68,7 @@ define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind { define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind { ; AVX2-LABEL: undef_splatmask2: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2] ; AVX2-NEXT: retq %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 undef> %res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> @@ -78,7 +78,7 @@ define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind { define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind { ; AVX2-LABEL: undef_splatmask3: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2] ; AVX2-NEXT: retq %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef> %res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 3> @@ -88,7 +88,7 @@ define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind { define <4 x i32> @undef_splatmask4(<4 x i32> %v, ptr %p) nounwind { ; AVX2-LABEL: undef_splatmask4: ; AVX2: # %bb.0: -; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,3,3] +; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,2,2] ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3] ; AVX2-NEXT: vmovaps %xmm0, (%rdi) ; AVX2-NEXT: vmovaps %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll index 209d6a5..93a692c 100644 --- a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll @@ -1911,13 +1911,13 @@ define <2 x i64> @test_v2f64_ogt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmoval %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: ucomisd %xmm4, %xmm2 ; SSE-32-NEXT: cmoval %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2031,13 +2031,13 @@ define <2 x i64> @test_v2f64_oge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovael %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: ucomisd %xmm4, %xmm2 ; SSE-32-NEXT: cmovael %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2151,13 +2151,13 @@ define <2 x i64> @test_v2f64_olt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmoval %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: ucomisd %xmm2, %xmm4 ; SSE-32-NEXT: cmoval %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2269,13 +2269,13 @@ define <2 x i64> @test_v2f64_ole_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovael %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: ucomisd %xmm2, %xmm4 ; SSE-32-NEXT: cmovael %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2680,13 +2680,13 @@ define <2 x i64> @test_v2f64_ugt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovbl %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: ucomisd %xmm2, %xmm4 ; SSE-32-NEXT: cmovbl %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2798,13 +2798,13 @@ define <2 x i64> @test_v2f64_uge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovbel %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: ucomisd %xmm2, %xmm4 ; SSE-32-NEXT: cmovbel %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -2916,13 +2916,13 @@ define <2 x i64> @test_v2f64_ult_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovbl %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: ucomisd %xmm4, %xmm2 ; SSE-32-NEXT: cmovbl %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 @@ -3036,13 +3036,13 @@ define <2 x i64> @test_v2f64_ule_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1, ; SSE-32-NEXT: movl $0, %edx ; SSE-32-NEXT: cmovbel %ecx, %edx ; SSE-32-NEXT: movd %edx, %xmm3 -; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] ; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-32-NEXT: ucomisd %xmm4, %xmm2 ; SSE-32-NEXT: cmovbel %ecx, %eax ; SSE-32-NEXT: movd %eax, %xmm2 -; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; SSE-32-NEXT: pand %xmm3, %xmm0 ; SSE-32-NEXT: pandn %xmm1, %xmm3 diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll index 9ecc629..b378dce 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll @@ -162,7 +162,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE2-NEXT: pslld $23, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -182,7 +182,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: pslld $23, %xmm1 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -200,7 +200,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; AVX1-LABEL: splatvar_funnnel_v2i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 @@ -277,7 +277,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; @@ -289,7 +289,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X86-SSE2-NEXT: pslld $23, %xmm1 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll index 322ebe2..06ff7e7 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll @@ -250,7 +250,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31] ; SSE2-NEXT: movdqa %xmm3, %xmm5 ; SSE2-NEXT: pandn %xmm4, %xmm5 @@ -286,7 +286,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; SSE41-LABEL: splatvar_funnnel_v2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31] ; SSE41-NEXT: movdqa %xmm2, %xmm4 ; SSE41-NEXT: pandn %xmm3, %xmm4 @@ -316,7 +316,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; AVX1-LABEL: splatvar_funnnel_v2i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] ; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -423,7 +423,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] ; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0 @@ -450,7 +450,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31] ; X86-SSE2-NEXT: movdqa %xmm3, %xmm5 ; X86-SSE2-NEXT: pandn %xmm4, %xmm5 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll index 178c02f..ef5ffe4 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll @@ -172,7 +172,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: psubd %xmm1, %xmm2 ; SSE2-NEXT: pslld $23, %xmm2 @@ -194,7 +194,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: psubd %xmm1, %xmm2 ; SSE41-NEXT: pslld $23, %xmm2 @@ -214,7 +214,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; AVX1-LABEL: splatvar_funnnel_v2i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 @@ -293,7 +293,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 @@ -309,7 +309,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] ; X86-SSE2-NEXT: pxor %xmm2, %xmm2 ; X86-SSE2-NEXT: psubd %xmm1, %xmm2 ; X86-SSE2-NEXT: pslld $23, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll index 372deb05..2d8670a 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll @@ -251,7 +251,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v2i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31] ; SSE2-NEXT: movdqa %xmm3, %xmm5 ; SSE2-NEXT: pand %xmm4, %xmm5 @@ -287,7 +287,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; SSE41-LABEL: splatvar_funnnel_v2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31] ; SSE41-NEXT: movdqa %xmm2, %xmm4 ; SSE41-NEXT: pand %xmm3, %xmm4 @@ -317,7 +317,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; AVX1-LABEL: splatvar_funnnel_v2i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -425,7 +425,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; XOPAVX1-LABEL: splatvar_funnnel_v2i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] ; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 @@ -452,7 +452,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> % ; ; X86-SSE2-LABEL: splatvar_funnnel_v2i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31] ; X86-SSE2-NEXT: movdqa %xmm3, %xmm5 ; X86-SSE2-NEXT: pand %xmm4, %xmm5 diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index f57efb4..1e11ea9 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -1409,11 +1409,11 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) { ; X86-SSE2-NEXT: movzbl %al, %eax ; X86-SSE2-NEXT: negl %eax ; X86-SSE2-NEXT: movd %eax, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] ; X86-SSE2-NEXT: andl $1, %ecx ; X86-SSE2-NEXT: negl %ecx ; X86-SSE2-NEXT: movd %ecx, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X86-SSE2-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index bd1a48b..7b0f1c9 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -2555,7 +2555,7 @@ entry: define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { ; SSE2-LABEL: splatshuf_zext_v4i64: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movdqa %xmm0, %xmm1 @@ -2563,7 +2563,7 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { ; ; SSSE3-LABEL: splatshuf_zext_v4i64: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSSE3-NEXT: pxor %xmm1, %xmm1 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: movdqa %xmm0, %xmm1 @@ -2571,7 +2571,7 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { ; ; SSE41-LABEL: splatshuf_zext_v4i64: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: retq diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll index 19a31a6..31ed745 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll @@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 16(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: paddb (%rdx), %xmm0 ; SSE2-NEXT: movdqa %xmm0, (%rcx) @@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 32(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] @@ -4610,7 +4610,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in. ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: paddb 48(%rsi), %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: pxor %xmm1, %xmm1 @@ -6544,7 +6544,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in. ; SSE2-NEXT: movdqa (%rdi), %xmm0 ; SSE2-NEXT: paddb (%rsi), %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movdqa 16(%rdx), %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll index 239472c..5b4cdd2 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll @@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in. ; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: paddb (%rsi), %xmm1 ; SSE2-NEXT: movdqa %xmm1, (%rdx) @@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in. ; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] @@ -3660,7 +3660,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in. ; SSE2-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; SSE2-NEXT: pxor %xmm0, %xmm0 @@ -5250,7 +5250,7 @@ define void @vec512_i16_widen_to_i256_factor16_broadcast_to_v2i256_factor2(ptr % define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.elt.ptr, ptr %out.vec.bias.ptr, ptr %out.vec.ptr) nounwind { ; SSE2-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,1,1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movdqa 16(%rsi), %xmm1 |