aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll6
-rw-r--r--llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll6
-rw-r--r--llvm/test/CodeGen/X86/fpclamptosat.ll45
-rw-r--r--llvm/test/CodeGen/X86/fpclamptosat_vec.ll105
-rw-r--r--llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll8
-rw-r--r--llvm/test/CodeGen/X86/vec-strict-cmp-128.ll32
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-sub128.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-sub128.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-sext.ll4
-rw-r--r--llvm/test/CodeGen/X86/vector-zext.ll6
-rw-r--r--llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll8
-rw-r--r--llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll8
14 files changed, 173 insertions, 95 deletions
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
index dec829f..44cf4e8 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
@@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 16(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: paddb (%rdx), %xmm0
; SSE2-NEXT: movdqa %xmm0, (%rcx)
@@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 32(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
@@ -4155,7 +4155,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 48(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: paddb (%rdx), %xmm2
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
index 3d4cddb..89b5c33 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: paddb (%rsi), %xmm1
; SSE2-NEXT: movdqa %xmm1, (%rdx)
@@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
@@ -3335,7 +3335,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa (%rdi), %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: paddb (%rsi), %xmm2
diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll
index 3f5ec7b..67483be 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat.ll
@@ -161,8 +161,8 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) nounwind {
+; CHECK-LABEL: utest_f16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -360,8 +360,8 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i16:
+define i16 @utest_f16i16(half %x) nounwind {
+; CHECK-LABEL: utest_f16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -566,8 +566,8 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i64:
+define i64 @utest_f16i64(half %x) nounwind {
+; CHECK-LABEL: utest_f16i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __fixunshfti@PLT
@@ -762,8 +762,8 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32_mm(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i32_mm:
+define i32 @utest_f16i32_mm(half %x) nounwind {
+; CHECK-LABEL: utest_f16i32_mm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -946,8 +946,8 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16_mm(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i16_mm:
+define i16 @utest_f16i16_mm(half %x) nounwind {
+; CHECK-LABEL: utest_f16i16_mm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -1131,8 +1131,8 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64_mm(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i64_mm:
+define i64 @utest_f16i64_mm(half %x) nounwind {
+; CHECK-LABEL: utest_f16i64_mm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __fixunshfti@PLT
@@ -1170,6 +1170,27 @@ entry:
ret i64 %conv6
}
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) nounwind {
+; CHECK-LABEL: ustest_f16i32_nsat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq __extendhfsf2@PLT
+; CHECK-NEXT: cvttss2si %xmm0, %ecx
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: sarl $31, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: andl %ecx, %eax
+; CHECK-NEXT: cmovlel %edx, %eax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: retq
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ ret i32 %spec.store.select7
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
index 1a2cfd6..991ce33 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
@@ -747,8 +747,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i32:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $72, %rsp
; SSE-NEXT: movaps %xmm0, %xmm1
@@ -835,7 +835,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
; SSE-NEXT: addq $72, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i32:
+; AVX2-LABEL: utest_f16i32:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
@@ -893,7 +893,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i32:
+; AVX512-LABEL: utest_f16i32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0
@@ -1338,8 +1338,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i16:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $72, %rsp
; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
@@ -1436,7 +1436,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind {
; SSE-NEXT: addq $72, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i16:
+; AVX2-LABEL: utest_f16i16:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
@@ -1453,7 +1453,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i16:
+; AVX512-LABEL: utest_f16i16:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0
@@ -2456,8 +2456,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i64:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %r14
; SSE-NEXT: pushq %rbx
@@ -2483,7 +2483,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind {
; SSE-NEXT: popq %r14
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i64:
+; AVX2-LABEL: utest_f16i64:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: pushq %r14
; AVX2-NEXT: pushq %rbx
@@ -2508,7 +2508,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind {
; AVX2-NEXT: popq %r14
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i64:
+; AVX512-LABEL: utest_f16i64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: pushq %r14
; AVX512-NEXT: pushq %rbx
@@ -3359,8 +3359,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i32_mm:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $72, %rsp
; SSE-NEXT: movaps %xmm0, %xmm1
@@ -3447,7 +3447,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
; SSE-NEXT: addq $72, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i32_mm:
+; AVX2-LABEL: utest_f16i32_mm:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
@@ -3505,7 +3505,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i32_mm:
+; AVX512-LABEL: utest_f16i32_mm:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0
@@ -3935,8 +3935,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i16_mm:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $72, %rsp
; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
@@ -4033,7 +4033,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind {
; SSE-NEXT: addq $72, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i16_mm:
+; AVX2-LABEL: utest_f16i16_mm:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
@@ -4050,7 +4050,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i16_mm:
+; AVX512-LABEL: utest_f16i16_mm:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0
@@ -4820,8 +4820,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i64_mm:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %r14
; SSE-NEXT: pushq %rbx
@@ -4847,7 +4847,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind {
; SSE-NEXT: popq %r14
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i64_mm:
+; AVX2-LABEL: utest_f16i64_mm:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: pushq %r14
; AVX2-NEXT: pushq %rbx
@@ -4872,7 +4872,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind {
; AVX2-NEXT: popq %r14
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i64_mm:
+; AVX512-LABEL: utest_f16i64_mm:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: pushq %r14
; AVX512-NEXT: pushq %rbx
@@ -4974,6 +4974,63 @@ entry:
ret <2 x i64> %conv6
}
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) nounwind {
+; SSE-LABEL: ustest_f16i32_nsat:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: subq $72, %rsp
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
+; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
+; SSE-NEXT: psrlq $48, %xmm0
+; SSE-NEXT: callq __extendhfsf2@PLT
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq __extendhfsf2@PLT
+; SSE-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq __extendhfsf2@PLT
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq __extendhfsf2@PLT
+; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: cvttps2dq %xmm1, %xmm0
+; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT: # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm2
+; SSE-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE-NEXT: pand %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: addq $72, %rsp
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ustest_f16i32_nsat:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ ret <4 x i32> %spec.store.select7
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
index ecd9435..1766b4d 100644
--- a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
+++ b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
@@ -58,7 +58,7 @@ define <8 x float> @foo8(<8 x float> %v, ptr%p) nounwind {
define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask:
; AVX2: # %bb.0:
-; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -68,7 +68,7 @@ define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind {
define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask2:
; AVX2: # %bb.0:
-; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -78,7 +78,7 @@ define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind {
define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask3:
; AVX2: # %bb.0:
-; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 3>
@@ -88,7 +88,7 @@ define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind {
define <4 x i32> @undef_splatmask4(<4 x i32> %v, ptr %p) nounwind {
; AVX2-LABEL: undef_splatmask4:
; AVX2: # %bb.0:
-; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,2,2]
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX2-NEXT: vmovaps %xmm0, (%rdi)
; AVX2-NEXT: vmovaps %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
index 209d6a5..93a692c 100644
--- a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
@@ -1911,13 +1911,13 @@ define <2 x i64> @test_v2f64_ogt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmoval %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
; SSE-32-NEXT: cmoval %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2031,13 +2031,13 @@ define <2 x i64> @test_v2f64_oge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovael %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
; SSE-32-NEXT: cmovael %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2151,13 +2151,13 @@ define <2 x i64> @test_v2f64_olt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmoval %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
; SSE-32-NEXT: cmoval %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2269,13 +2269,13 @@ define <2 x i64> @test_v2f64_ole_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovael %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
; SSE-32-NEXT: cmovael %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2680,13 +2680,13 @@ define <2 x i64> @test_v2f64_ugt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovbl %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
; SSE-32-NEXT: cmovbl %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2798,13 +2798,13 @@ define <2 x i64> @test_v2f64_uge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovbel %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
; SSE-32-NEXT: cmovbel %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2916,13 +2916,13 @@ define <2 x i64> @test_v2f64_ult_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovbl %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
; SSE-32-NEXT: cmovbl %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -3036,13 +3036,13 @@ define <2 x i64> @test_v2f64_ule_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovbel %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
; SSE-32-NEXT: cmovbel %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
index 9ecc629..b378dce 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
@@ -162,7 +162,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE2-NEXT: pslld $23, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -182,7 +182,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; SSE41-LABEL: splatvar_funnnel_v2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: pslld $23, %xmm1
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -200,7 +200,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; AVX1-LABEL: splatvar_funnnel_v2i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
@@ -277,7 +277,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
@@ -289,7 +289,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X86-SSE2-NEXT: pslld $23, %xmm1
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll
index 322ebe2..06ff7e7 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll
@@ -250,7 +250,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt)
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: pandn %xmm4, %xmm5
@@ -286,7 +286,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; SSE41-LABEL: splatvar_funnnel_v2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31]
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: pandn %xmm3, %xmm4
@@ -316,7 +316,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; AVX1-LABEL: splatvar_funnnel_v2i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
@@ -423,7 +423,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0
@@ -450,7 +450,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
; X86-SSE2-NEXT: pandn %xmm4, %xmm5
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
index 178c02f..ef5ffe4 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
@@ -172,7 +172,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: psubd %xmm1, %xmm2
; SSE2-NEXT: pslld $23, %xmm2
@@ -194,7 +194,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; SSE41-LABEL: splatvar_funnnel_v2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: psubd %xmm1, %xmm2
; SSE41-NEXT: pslld $23, %xmm2
@@ -214,7 +214,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; AVX1-LABEL: splatvar_funnnel_v2i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
@@ -293,7 +293,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
@@ -309,7 +309,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X86-SSE2-NEXT: pxor %xmm2, %xmm2
; X86-SSE2-NEXT: psubd %xmm1, %xmm2
; X86-SSE2-NEXT: pslld $23, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll
index 372deb05..2d8670a 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll
@@ -251,7 +251,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt)
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: pand %xmm4, %xmm5
@@ -287,7 +287,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; SSE41-LABEL: splatvar_funnnel_v2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31]
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: pand %xmm3, %xmm4
@@ -317,7 +317,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; AVX1-LABEL: splatvar_funnnel_v2i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
@@ -425,7 +425,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
@@ -452,7 +452,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
; X86-SSE2-NEXT: pand %xmm4, %xmm5
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index f57efb4..1e11ea9 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -1409,11 +1409,11 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) {
; X86-SSE2-NEXT: movzbl %al, %eax
; X86-SSE2-NEXT: negl %eax
; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
; X86-SSE2-NEXT: andl $1, %ecx
; X86-SSE2-NEXT: negl %ecx
; X86-SSE2-NEXT: movd %ecx, %xmm0
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X86-SSE2-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index bd1a48b..7b0f1c9 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -2555,7 +2555,7 @@ entry:
define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
; SSE2-LABEL: splatshuf_zext_v4i64:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -2563,7 +2563,7 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
;
; SSSE3-LABEL: splatshuf_zext_v4i64:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSSE3-NEXT: pxor %xmm1, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movdqa %xmm0, %xmm1
@@ -2571,7 +2571,7 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
;
; SSE41-LABEL: splatshuf_zext_v4i64:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
index 19a31a6..31ed745 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
@@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 16(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: paddb (%rdx), %xmm0
; SSE2-NEXT: movdqa %xmm0, (%rcx)
@@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 32(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
@@ -4610,7 +4610,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 48(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pxor %xmm1, %xmm1
@@ -6544,7 +6544,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
; SSE2-NEXT: movdqa (%rdi), %xmm0
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movdqa 16(%rdx), %xmm1
; SSE2-NEXT: paddb %xmm0, %xmm1
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index 239472c..5b4cdd2 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: paddb (%rsi), %xmm1
; SSE2-NEXT: movdqa %xmm1, (%rdx)
@@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
@@ -3660,7 +3660,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; SSE2-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE2-NEXT: pxor %xmm0, %xmm0
@@ -5250,7 +5250,7 @@ define void @vec512_i16_widen_to_i256_factor16_broadcast_to_v2i256_factor2(ptr %
define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.elt.ptr, ptr %out.vec.bias.ptr, ptr %out.vec.ptr) nounwind {
; SSE2-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movdqa 16(%rsi), %xmm1