aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/avx2-vector-shifts.ll12
-rw-r--r--llvm/test/CodeGen/X86/gfni-shifts.ll168
-rw-r--r--llvm/test/CodeGen/X86/isel-fpclass.ll256
-rw-r--r--llvm/test/CodeGen/X86/isel-smax.ll244
-rw-r--r--llvm/test/CodeGen/X86/isel-smin.ll244
-rw-r--r--llvm/test/CodeGen/X86/isel-umax.ll244
-rw-r--r--llvm/test/CodeGen/X86/isel-umin.ll244
-rw-r--r--llvm/test/CodeGen/X86/logic-shift.ll36
-rw-r--r--llvm/test/CodeGen/X86/pr161693.ll40
-rw-r--r--llvm/test/CodeGen/X86/prefer-avx256-shift.ll36
-rw-r--r--llvm/test/CodeGen/X86/shuffle-as-shifts.ll54
-rw-r--r--llvm/test/CodeGen/X86/sshl_sat_vec.ll12
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-128.ll16
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-256.ll34
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-ashr-128.ll28
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-ashr-256.ll164
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-ashr-512.ll36
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll68
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-lshr-128.ll16
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-lshr-256.ll68
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll32
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll16
22 files changed, 1208 insertions, 860 deletions
diff --git a/llvm/test/CodeGen/X86/avx2-vector-shifts.ll b/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
index 983c69d..95c2eda 100644
--- a/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
+++ b/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
@@ -441,10 +441,10 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; CHECK-NEXT: vpsraw $4, %ymm3, %ymm4
; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; CHECK-NEXT: vpsraw $2, %ymm3, %ymm4
-; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm5
+; CHECK-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3
; CHECK-NEXT: vpsraw $1, %ymm3, %ymm4
-; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; CHECK-NEXT: vpsllw $2, %ymm2, %ymm2
; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; CHECK-NEXT: vpsrlw $8, %ymm2, %ymm2
; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -452,10 +452,10 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; CHECK-NEXT: vpsraw $4, %ymm0, %ymm3
; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; CHECK-NEXT: vpsraw $2, %ymm0, %ymm3
-; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm4
+; CHECK-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
; CHECK-NEXT: vpsraw $1, %ymm0, %ymm3
-; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vpsllw $2, %ymm1, %ymm1
; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; CHECK-NEXT: vpsrlw $8, %ymm0, %ymm0
; CHECK-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/gfni-shifts.ll b/llvm/test/CodeGen/X86/gfni-shifts.ll
index cd16651..feac3dc 100644
--- a/llvm/test/CodeGen/X86/gfni-shifts.ll
+++ b/llvm/test/CodeGen/X86/gfni-shifts.ll
@@ -166,10 +166,10 @@ define <16 x i8> @var_ashr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; GFNIAVX1OR2-NEXT: vpsraw $4, %xmm3, %xmm4
; GFNIAVX1OR2-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
; GFNIAVX1OR2-NEXT: vpsraw $2, %xmm3, %xmm4
-; GFNIAVX1OR2-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; GFNIAVX1OR2-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; GFNIAVX1OR2-NEXT: vpaddw %xmm2, %xmm2, %xmm5
+; GFNIAVX1OR2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
; GFNIAVX1OR2-NEXT: vpsraw $1, %xmm3, %xmm4
-; GFNIAVX1OR2-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; GFNIAVX1OR2-NEXT: vpsllw $2, %xmm2, %xmm2
; GFNIAVX1OR2-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
; GFNIAVX1OR2-NEXT: vpsrlw $8, %xmm2, %xmm2
; GFNIAVX1OR2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -177,10 +177,10 @@ define <16 x i8> @var_ashr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; GFNIAVX1OR2-NEXT: vpsraw $4, %xmm0, %xmm3
; GFNIAVX1OR2-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; GFNIAVX1OR2-NEXT: vpsraw $2, %xmm0, %xmm3
-; GFNIAVX1OR2-NEXT: vpaddw %xmm1, %xmm1, %xmm1
-; GFNIAVX1OR2-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; GFNIAVX1OR2-NEXT: vpaddw %xmm1, %xmm1, %xmm4
+; GFNIAVX1OR2-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
; GFNIAVX1OR2-NEXT: vpsraw $1, %xmm0, %xmm3
-; GFNIAVX1OR2-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; GFNIAVX1OR2-NEXT: vpsllw $2, %xmm1, %xmm1
; GFNIAVX1OR2-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; GFNIAVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm0
; GFNIAVX1OR2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
@@ -896,10 +896,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; GFNIAVX1-NEXT: vpsraw $4, %xmm5, %xmm6
; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5
; GFNIAVX1-NEXT: vpsraw $2, %xmm5, %xmm6
-; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5
+; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm7
+; GFNIAVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm5, %xmm5
; GFNIAVX1-NEXT: vpsraw $1, %xmm5, %xmm6
-; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
+; GFNIAVX1-NEXT: vpsllw $2, %xmm3, %xmm3
; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm3
; GFNIAVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -907,10 +907,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; GFNIAVX1-NEXT: vpsraw $4, %xmm4, %xmm5
; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpsraw $2, %xmm4, %xmm5
-; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm6
+; GFNIAVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpsraw $1, %xmm4, %xmm5
-; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; GFNIAVX1-NEXT: vpsllw $2, %xmm2, %xmm2
; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
; GFNIAVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; GFNIAVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
@@ -920,10 +920,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; GFNIAVX1-NEXT: vpsraw $4, %xmm4, %xmm5
; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpsraw $2, %xmm4, %xmm5
-; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4
+; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm6
+; GFNIAVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpsraw $1, %xmm4, %xmm5
-; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
+; GFNIAVX1-NEXT: vpsllw $2, %xmm3, %xmm3
; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3
; GFNIAVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -931,10 +931,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; GFNIAVX1-NEXT: vpsraw $4, %xmm0, %xmm4
; GFNIAVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
; GFNIAVX1-NEXT: vpsraw $2, %xmm0, %xmm4
-; GFNIAVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; GFNIAVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm5
+; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0
; GFNIAVX1-NEXT: vpsraw $1, %xmm0, %xmm4
-; GFNIAVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; GFNIAVX1-NEXT: vpsllw $2, %xmm1, %xmm1
; GFNIAVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
; GFNIAVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
; GFNIAVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
@@ -949,10 +949,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; GFNIAVX2-NEXT: vpsraw $4, %ymm3, %ymm4
; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; GFNIAVX2-NEXT: vpsraw $2, %ymm3, %ymm4
-; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm5
+; GFNIAVX2-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3
; GFNIAVX2-NEXT: vpsraw $1, %ymm3, %ymm4
-; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; GFNIAVX2-NEXT: vpsllw $2, %ymm2, %ymm2
; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; GFNIAVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -960,10 +960,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; GFNIAVX2-NEXT: vpsraw $4, %ymm0, %ymm3
; GFNIAVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; GFNIAVX2-NEXT: vpsraw $2, %ymm0, %ymm3
-; GFNIAVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; GFNIAVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm4
+; GFNIAVX2-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
; GFNIAVX2-NEXT: vpsraw $1, %ymm0, %ymm3
-; GFNIAVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; GFNIAVX2-NEXT: vpsllw $2, %ymm1, %ymm1
; GFNIAVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; GFNIAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
; GFNIAVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
@@ -977,10 +977,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; GFNIAVX512VL-NEXT: vpsraw $4, %ymm3, %ymm4
; GFNIAVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; GFNIAVX512VL-NEXT: vpsraw $2, %ymm3, %ymm4
-; GFNIAVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; GFNIAVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; GFNIAVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm5
+; GFNIAVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3
; GFNIAVX512VL-NEXT: vpsraw $1, %ymm3, %ymm4
-; GFNIAVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; GFNIAVX512VL-NEXT: vpsllw $2, %ymm2, %ymm2
; GFNIAVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -988,10 +988,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; GFNIAVX512VL-NEXT: vpsraw $4, %ymm0, %ymm3
; GFNIAVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; GFNIAVX512VL-NEXT: vpsraw $2, %ymm0, %ymm3
-; GFNIAVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; GFNIAVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; GFNIAVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm4
+; GFNIAVX512VL-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
; GFNIAVX512VL-NEXT: vpsraw $1, %ymm0, %ymm3
-; GFNIAVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; GFNIAVX512VL-NEXT: vpsllw $2, %ymm1, %ymm1
; GFNIAVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
; GFNIAVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
@@ -2027,10 +2027,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX1-NEXT: vpsraw $4, %xmm7, %xmm8
; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm8, %xmm7, %xmm7
; GFNIAVX1-NEXT: vpsraw $2, %xmm7, %xmm8
-; GFNIAVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5
-; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm8, %xmm7, %xmm7
+; GFNIAVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm9
+; GFNIAVX1-NEXT: vpblendvb %xmm9, %xmm8, %xmm7, %xmm7
; GFNIAVX1-NEXT: vpsraw $1, %xmm7, %xmm8
-; GFNIAVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5
+; GFNIAVX1-NEXT: vpsllw $2, %xmm5, %xmm5
; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm8, %xmm7, %xmm5
; GFNIAVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -2038,10 +2038,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX1-NEXT: vpsraw $4, %xmm6, %xmm7
; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm7, %xmm6, %xmm6
; GFNIAVX1-NEXT: vpsraw $2, %xmm6, %xmm7
-; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm7, %xmm6, %xmm6
+; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm8
+; GFNIAVX1-NEXT: vpblendvb %xmm8, %xmm7, %xmm6, %xmm6
; GFNIAVX1-NEXT: vpsraw $1, %xmm6, %xmm7
-; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4
+; GFNIAVX1-NEXT: vpsllw $2, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm7, %xmm6, %xmm4
; GFNIAVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpackuswb %xmm5, %xmm4, %xmm4
@@ -2051,10 +2051,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX1-NEXT: vpsraw $4, %xmm6, %xmm7
; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm7, %xmm6, %xmm6
; GFNIAVX1-NEXT: vpsraw $2, %xmm6, %xmm7
-; GFNIAVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5
-; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm7, %xmm6, %xmm6
+; GFNIAVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm8
+; GFNIAVX1-NEXT: vpblendvb %xmm8, %xmm7, %xmm6, %xmm6
; GFNIAVX1-NEXT: vpsraw $1, %xmm6, %xmm7
-; GFNIAVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5
+; GFNIAVX1-NEXT: vpsllw $2, %xmm5, %xmm5
; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm7, %xmm6, %xmm5
; GFNIAVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -2062,10 +2062,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX1-NEXT: vpsraw $4, %xmm0, %xmm6
; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm6, %xmm0, %xmm0
; GFNIAVX1-NEXT: vpsraw $2, %xmm0, %xmm6
-; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm6, %xmm0, %xmm0
+; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm7
+; GFNIAVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm0, %xmm0
; GFNIAVX1-NEXT: vpsraw $1, %xmm0, %xmm6
-; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; GFNIAVX1-NEXT: vpsllw $2, %xmm2, %xmm2
; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm6, %xmm0, %xmm0
; GFNIAVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
; GFNIAVX1-NEXT: vpackuswb %xmm5, %xmm0, %xmm0
@@ -2078,10 +2078,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX1-NEXT: vpsraw $4, %xmm6, %xmm7
; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm7, %xmm6, %xmm6
; GFNIAVX1-NEXT: vpsraw $2, %xmm6, %xmm7
-; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm7, %xmm6, %xmm6
+; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm8
+; GFNIAVX1-NEXT: vpblendvb %xmm8, %xmm7, %xmm6, %xmm6
; GFNIAVX1-NEXT: vpsraw $1, %xmm6, %xmm7
-; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4
+; GFNIAVX1-NEXT: vpsllw $2, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm7, %xmm6, %xmm4
; GFNIAVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -2089,10 +2089,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX1-NEXT: vpsraw $4, %xmm5, %xmm6
; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm6, %xmm5, %xmm5
; GFNIAVX1-NEXT: vpsraw $2, %xmm5, %xmm6
-; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm6, %xmm5, %xmm5
+; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm7
+; GFNIAVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm5, %xmm5
; GFNIAVX1-NEXT: vpsraw $1, %xmm5, %xmm6
-; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; GFNIAVX1-NEXT: vpsllw $2, %xmm2, %xmm2
; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm6, %xmm5, %xmm2
; GFNIAVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; GFNIAVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2
@@ -2102,10 +2102,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX1-NEXT: vpsraw $4, %xmm5, %xmm6
; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm6, %xmm5, %xmm5
; GFNIAVX1-NEXT: vpsraw $2, %xmm5, %xmm6
-; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm6, %xmm5, %xmm5
+; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm7
+; GFNIAVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm5, %xmm5
; GFNIAVX1-NEXT: vpsraw $1, %xmm5, %xmm6
-; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4
+; GFNIAVX1-NEXT: vpsllw $2, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm6, %xmm5, %xmm4
; GFNIAVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -2113,10 +2113,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX1-NEXT: vpsraw $4, %xmm1, %xmm5
; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm1, %xmm1
; GFNIAVX1-NEXT: vpsraw $2, %xmm1, %xmm5
-; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm1, %xmm1
+; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm6
+; GFNIAVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1
; GFNIAVX1-NEXT: vpsraw $1, %xmm1, %xmm5
-; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
+; GFNIAVX1-NEXT: vpsllw $2, %xmm3, %xmm3
; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm1, %xmm1
; GFNIAVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
; GFNIAVX1-NEXT: vpackuswb %xmm4, %xmm1, %xmm1
@@ -2131,10 +2131,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX2-NEXT: vpsraw $4, %ymm5, %ymm6
; GFNIAVX2-NEXT: vpblendvb %ymm4, %ymm6, %ymm5, %ymm5
; GFNIAVX2-NEXT: vpsraw $2, %ymm5, %ymm6
-; GFNIAVX2-NEXT: vpaddw %ymm4, %ymm4, %ymm4
-; GFNIAVX2-NEXT: vpblendvb %ymm4, %ymm6, %ymm5, %ymm5
+; GFNIAVX2-NEXT: vpaddw %ymm4, %ymm4, %ymm7
+; GFNIAVX2-NEXT: vpblendvb %ymm7, %ymm6, %ymm5, %ymm5
; GFNIAVX2-NEXT: vpsraw $1, %ymm5, %ymm6
-; GFNIAVX2-NEXT: vpaddw %ymm4, %ymm4, %ymm4
+; GFNIAVX2-NEXT: vpsllw $2, %ymm4, %ymm4
; GFNIAVX2-NEXT: vpblendvb %ymm4, %ymm6, %ymm5, %ymm4
; GFNIAVX2-NEXT: vpsrlw $8, %ymm4, %ymm4
; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -2142,10 +2142,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX2-NEXT: vpsraw $4, %ymm0, %ymm5
; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
; GFNIAVX2-NEXT: vpsraw $2, %ymm0, %ymm5
-; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm6
+; GFNIAVX2-NEXT: vpblendvb %ymm6, %ymm5, %ymm0, %ymm0
; GFNIAVX2-NEXT: vpsraw $1, %ymm0, %ymm5
-; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; GFNIAVX2-NEXT: vpsllw $2, %ymm2, %ymm2
; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
; GFNIAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
; GFNIAVX2-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
@@ -2155,10 +2155,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX2-NEXT: vpsraw $4, %ymm4, %ymm5
; GFNIAVX2-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
; GFNIAVX2-NEXT: vpsraw $2, %ymm4, %ymm5
-; GFNIAVX2-NEXT: vpaddw %ymm3, %ymm3, %ymm3
-; GFNIAVX2-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
+; GFNIAVX2-NEXT: vpaddw %ymm3, %ymm3, %ymm6
+; GFNIAVX2-NEXT: vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
; GFNIAVX2-NEXT: vpsraw $1, %ymm4, %ymm5
-; GFNIAVX2-NEXT: vpaddw %ymm3, %ymm3, %ymm3
+; GFNIAVX2-NEXT: vpsllw $2, %ymm3, %ymm3
; GFNIAVX2-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
; GFNIAVX2-NEXT: vpsrlw $8, %ymm3, %ymm3
; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -2166,10 +2166,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX2-NEXT: vpsraw $4, %ymm1, %ymm4
; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
; GFNIAVX2-NEXT: vpsraw $2, %ymm1, %ymm4
-; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
+; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm5
+; GFNIAVX2-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1
; GFNIAVX2-NEXT: vpsraw $1, %ymm1, %ymm4
-; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; GFNIAVX2-NEXT: vpsllw $2, %ymm2, %ymm2
; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
; GFNIAVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
; GFNIAVX2-NEXT: vpackuswb %ymm3, %ymm1, %ymm1
@@ -2185,10 +2185,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX512VL-NEXT: vpsraw $4, %ymm5, %ymm6
; GFNIAVX512VL-NEXT: vpblendvb %ymm3, %ymm6, %ymm5, %ymm5
; GFNIAVX512VL-NEXT: vpsraw $2, %ymm5, %ymm6
-; GFNIAVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm3
-; GFNIAVX512VL-NEXT: vpblendvb %ymm3, %ymm6, %ymm5, %ymm5
+; GFNIAVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm7
+; GFNIAVX512VL-NEXT: vpblendvb %ymm7, %ymm6, %ymm5, %ymm5
; GFNIAVX512VL-NEXT: vpsraw $1, %ymm5, %ymm6
-; GFNIAVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm3
+; GFNIAVX512VL-NEXT: vpsllw $2, %ymm3, %ymm3
; GFNIAVX512VL-NEXT: vpblendvb %ymm3, %ymm6, %ymm5, %ymm3
; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -2196,10 +2196,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX512VL-NEXT: vpsraw $4, %ymm4, %ymm5
; GFNIAVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4
; GFNIAVX512VL-NEXT: vpsraw $2, %ymm4, %ymm5
-; GFNIAVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; GFNIAVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4
+; GFNIAVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm6
+; GFNIAVX512VL-NEXT: vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
; GFNIAVX512VL-NEXT: vpsraw $1, %ymm4, %ymm5
-; GFNIAVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; GFNIAVX512VL-NEXT: vpsllw $2, %ymm2, %ymm2
; GFNIAVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm2
; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
; GFNIAVX512VL-NEXT: vpackuswb %ymm3, %ymm2, %ymm2
@@ -2209,10 +2209,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX512VL-NEXT: vpsraw $4, %ymm4, %ymm5
; GFNIAVX512VL-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
; GFNIAVX512VL-NEXT: vpsraw $2, %ymm4, %ymm5
-; GFNIAVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm3
-; GFNIAVX512VL-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
+; GFNIAVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm6
+; GFNIAVX512VL-NEXT: vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
; GFNIAVX512VL-NEXT: vpsraw $1, %ymm4, %ymm5
-; GFNIAVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm3
+; GFNIAVX512VL-NEXT: vpsllw $2, %ymm3, %ymm3
; GFNIAVX512VL-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3
; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -2220,10 +2220,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX512VL-NEXT: vpsraw $4, %ymm0, %ymm4
; GFNIAVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; GFNIAVX512VL-NEXT: vpsraw $2, %ymm0, %ymm4
-; GFNIAVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; GFNIAVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
+; GFNIAVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm5
+; GFNIAVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm0, %ymm0
; GFNIAVX512VL-NEXT: vpsraw $1, %ymm0, %ymm4
-; GFNIAVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; GFNIAVX512VL-NEXT: vpsllw $2, %ymm1, %ymm1
; GFNIAVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
; GFNIAVX512VL-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
@@ -2239,11 +2239,11 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX512BW-NEXT: vpmovb2m %zmm4, %k1
; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
; GFNIAVX512BW-NEXT: vpsraw $2, %zmm2, %zmm3
-; GFNIAVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
-; GFNIAVX512BW-NEXT: vpmovb2m %zmm4, %k1
+; GFNIAVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm5
+; GFNIAVX512BW-NEXT: vpmovb2m %zmm5, %k1
; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
; GFNIAVX512BW-NEXT: vpsraw $1, %zmm2, %zmm3
-; GFNIAVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
+; GFNIAVX512BW-NEXT: vpsllw $2, %zmm4, %zmm4
; GFNIAVX512BW-NEXT: vpmovb2m %zmm4, %k1
; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
@@ -2253,11 +2253,11 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNIAVX512BW-NEXT: vpmovb2m %zmm1, %k1
; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
; GFNIAVX512BW-NEXT: vpsraw $2, %zmm0, %zmm3
-; GFNIAVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1
-; GFNIAVX512BW-NEXT: vpmovb2m %zmm1, %k1
+; GFNIAVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm4
+; GFNIAVX512BW-NEXT: vpmovb2m %zmm4, %k1
; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
; GFNIAVX512BW-NEXT: vpsraw $1, %zmm0, %zmm3
-; GFNIAVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1
+; GFNIAVX512BW-NEXT: vpsllw $2, %zmm1, %zmm1
; GFNIAVX512BW-NEXT: vpmovb2m %zmm1, %k1
; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll
index 960bbf5..df04b67 100644
--- a/llvm/test/CodeGen/X86/isel-fpclass.ll
+++ b/llvm/test/CodeGen/X86/isel-fpclass.ll
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86-SDAGISEL
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL
; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X86-FASTISEL
; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-FASTISEL
+; RUN: llc < %s -mtriple=i686-linux -global-isel -global-isel-abort=2 | FileCheck %s -check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-linux -global-isel -global-isel-abort=2 | FileCheck %s -check-prefixes=X64,X64-GISEL
-; FIXME: We can reuse/delete llvm/test/CodeGen/X86/is_fpclass.ll when all patches are included.
-
-define i1 @isnone_f(float %x) {
-; X86-SDAGISEL-LABEL: isnone_f:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: xorl %eax, %eax
-; X86-SDAGISEL-NEXT: retl
+define i1 @isnone_f(float %x) nounwind {
+; X86-LABEL: isnone_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: retl
;
; X64-LABEL: isnone_f:
; X64: # %bb.0: # %entry
@@ -28,11 +28,11 @@ entry:
ret i1 %0
}
-define i1 @isany_f(float %x) {
-; X86-SDAGISEL-LABEL: isany_f:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: movb $1, %al
-; X86-SDAGISEL-NEXT: retl
+define i1 @isany_f(float %x) nounwind {
+; X86-LABEL: isany_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb $1, %al
+; X86-NEXT: retl
;
; X64-LABEL: isany_f:
; X64: # %bb.0: # %entry
@@ -50,17 +50,17 @@ entry:
ret i1 %0
}
-define i1 @issignaling_f(float %x) {
-; X86-SDAGISEL-LABEL: issignaling_f:
-; X86-SDAGISEL: # %bb.0:
-; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; X86-SDAGISEL-NEXT: setl %cl
-; X86-SDAGISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; X86-SDAGISEL-NEXT: setge %al
-; X86-SDAGISEL-NEXT: andb %cl, %al
-; X86-SDAGISEL-NEXT: retl
+define i1 @issignaling_f(float %x) nounwind {
+; X86-LABEL: issignaling_f:
+; X86: # %bb.0:
+; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-NEXT: setl %cl
+; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-NEXT: setge %al
+; X86-NEXT: andb %cl, %al
+; X86-NEXT: retl
;
; X64-LABEL: issignaling_f:
; X64: # %bb.0:
@@ -76,7 +76,6 @@ define i1 @issignaling_f(float %x) {
; X86-FASTISEL-LABEL: issignaling_f:
; X86-FASTISEL: # %bb.0:
; X86-FASTISEL-NEXT: pushl %eax
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
; X86-FASTISEL-NEXT: fstps (%esp)
; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
@@ -87,20 +86,19 @@ define i1 @issignaling_f(float %x) {
; X86-FASTISEL-NEXT: setge %al
; X86-FASTISEL-NEXT: andb %cl, %al
; X86-FASTISEL-NEXT: popl %ecx
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
%a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan"
ret i1 %a0
}
- define i1 @isquiet_f(float %x) {
-; X86-SDAGISEL-LABEL: isquiet_f:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; X86-SDAGISEL-NEXT: setge %al
-; X86-SDAGISEL-NEXT: retl
+ define i1 @isquiet_f(float %x) nounwind {
+; X86-LABEL: isquiet_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-NEXT: setge %al
+; X86-NEXT: retl
;
; X64-LABEL: isquiet_f:
; X64: # %bb.0: # %entry
@@ -113,7 +111,6 @@ define i1 @issignaling_f(float %x) {
; X86-FASTISEL-LABEL: isquiet_f:
; X86-FASTISEL: # %bb.0: # %entry
; X86-FASTISEL-NEXT: pushl %eax
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
; X86-FASTISEL-NEXT: fstps (%esp)
; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
@@ -121,21 +118,20 @@ define i1 @issignaling_f(float %x) {
; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
; X86-FASTISEL-NEXT: setge %al
; X86-FASTISEL-NEXT: popl %ecx
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan"
ret i1 %0
}
-define i1 @not_isquiet_f(float %x) {
-; X86-SDAGISEL-LABEL: not_isquiet_f:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; X86-SDAGISEL-NEXT: setl %al
-; X86-SDAGISEL-NEXT: retl
+define i1 @not_isquiet_f(float %x) nounwind {
+; X86-LABEL: not_isquiet_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-NEXT: setl %al
+; X86-NEXT: retl
;
; X64-LABEL: not_isquiet_f:
; X64: # %bb.0: # %entry
@@ -148,7 +144,6 @@ define i1 @not_isquiet_f(float %x) {
; X86-FASTISEL-LABEL: not_isquiet_f:
; X86-FASTISEL: # %bb.0: # %entry
; X86-FASTISEL-NEXT: pushl %eax
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
; X86-FASTISEL-NEXT: fstps (%esp)
; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
@@ -156,21 +151,20 @@ define i1 @not_isquiet_f(float %x) {
; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
; X86-FASTISEL-NEXT: setl %al
; X86-FASTISEL-NEXT: popl %ecx
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan"
ret i1 %0
}
-define i1 @isinf_f(float %x) {
-; X86-SDAGISEL-LABEL: isinf_f:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-SDAGISEL-NEXT: sete %al
-; X86-SDAGISEL-NEXT: retl
+define i1 @isinf_f(float %x) nounwind {
+; X86-LABEL: isinf_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-NEXT: sete %al
+; X86-NEXT: retl
;
; X64-LABEL: isinf_f:
; X64: # %bb.0: # %entry
@@ -183,7 +177,6 @@ define i1 @isinf_f(float %x) {
; X86-FASTISEL-LABEL: isinf_f:
; X86-FASTISEL: # %bb.0: # %entry
; X86-FASTISEL-NEXT: pushl %eax
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
; X86-FASTISEL-NEXT: fstps (%esp)
; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
@@ -191,21 +184,20 @@ define i1 @isinf_f(float %x) {
; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
; X86-FASTISEL-NEXT: sete %al
; X86-FASTISEL-NEXT: popl %ecx
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf"
ret i1 %0
}
-define i1 @not_isinf_f(float %x) {
-; X86-SDAGISEL-LABEL: not_isinf_f:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-SDAGISEL-NEXT: setne %al
-; X86-SDAGISEL-NEXT: retl
+define i1 @not_isinf_f(float %x) nounwind {
+; X86-LABEL: not_isinf_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-NEXT: setne %al
+; X86-NEXT: retl
;
; X64-LABEL: not_isinf_f:
; X64: # %bb.0: # %entry
@@ -218,7 +210,6 @@ define i1 @not_isinf_f(float %x) {
; X86-FASTISEL-LABEL: not_isinf_f:
; X86-FASTISEL: # %bb.0: # %entry
; X86-FASTISEL-NEXT: pushl %eax
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
; X86-FASTISEL-NEXT: fstps (%esp)
; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
@@ -226,19 +217,18 @@ define i1 @not_isinf_f(float %x) {
; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
; X86-FASTISEL-NEXT: setne %al
; X86-FASTISEL-NEXT: popl %ecx
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf"
ret i1 %0
}
-define i1 @is_plus_inf_f(float %x) {
-; X86-SDAGISEL-LABEL: is_plus_inf_f:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
-; X86-SDAGISEL-NEXT: sete %al
-; X86-SDAGISEL-NEXT: retl
+define i1 @is_plus_inf_f(float %x) nounwind {
+; X86-LABEL: is_plus_inf_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-NEXT: sete %al
+; X86-NEXT: retl
;
; X64-LABEL: is_plus_inf_f:
; X64: # %bb.0: # %entry
@@ -250,25 +240,23 @@ define i1 @is_plus_inf_f(float %x) {
; X86-FASTISEL-LABEL: is_plus_inf_f:
; X86-FASTISEL: # %bb.0: # %entry
; X86-FASTISEL-NEXT: pushl %eax
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
; X86-FASTISEL-NEXT: fstps (%esp)
; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000
; X86-FASTISEL-NEXT: sete %al
; X86-FASTISEL-NEXT: popl %ecx
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf"
ret i1 %0
}
-define i1 @is_minus_inf_f(float %x) {
-; X86-SDAGISEL-LABEL: is_minus_inf_f:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
-; X86-SDAGISEL-NEXT: sete %al
-; X86-SDAGISEL-NEXT: retl
+define i1 @is_minus_inf_f(float %x) nounwind {
+; X86-LABEL: is_minus_inf_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-NEXT: sete %al
+; X86-NEXT: retl
;
; X64-LABEL: is_minus_inf_f:
; X64: # %bb.0: # %entry
@@ -280,25 +268,23 @@ define i1 @is_minus_inf_f(float %x) {
; X86-FASTISEL-LABEL: is_minus_inf_f:
; X86-FASTISEL: # %bb.0: # %entry
; X86-FASTISEL-NEXT: pushl %eax
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
; X86-FASTISEL-NEXT: fstps (%esp)
; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000
; X86-FASTISEL-NEXT: sete %al
; X86-FASTISEL-NEXT: popl %ecx
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf"
ret i1 %0
}
-define i1 @not_is_minus_inf_f(float %x) {
-; X86-SDAGISEL-LABEL: not_is_minus_inf_f:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
-; X86-SDAGISEL-NEXT: setne %al
-; X86-SDAGISEL-NEXT: retl
+define i1 @not_is_minus_inf_f(float %x) nounwind {
+; X86-LABEL: not_is_minus_inf_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-NEXT: setne %al
+; X86-NEXT: retl
;
; X64-LABEL: not_is_minus_inf_f:
; X64: # %bb.0: # %entry
@@ -310,27 +296,25 @@ define i1 @not_is_minus_inf_f(float %x) {
; X86-FASTISEL-LABEL: not_is_minus_inf_f:
; X86-FASTISEL: # %bb.0: # %entry
; X86-FASTISEL-NEXT: pushl %eax
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
; X86-FASTISEL-NEXT: fstps (%esp)
; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000
; X86-FASTISEL-NEXT: setne %al
; X86-FASTISEL-NEXT: popl %ecx
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf"
ret i1 %0
}
-define i1 @isfinite_f(float %x) {
-; X86-SDAGISEL-LABEL: isfinite_f:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-SDAGISEL-NEXT: setl %al
-; X86-SDAGISEL-NEXT: retl
+define i1 @isfinite_f(float %x) nounwind {
+; X86-LABEL: isfinite_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-NEXT: setl %al
+; X86-NEXT: retl
;
; X64-LABEL: isfinite_f:
; X64: # %bb.0: # %entry
@@ -343,7 +327,6 @@ define i1 @isfinite_f(float %x) {
; X86-FASTISEL-LABEL: isfinite_f:
; X86-FASTISEL: # %bb.0: # %entry
; X86-FASTISEL-NEXT: pushl %eax
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
; X86-FASTISEL-NEXT: fstps (%esp)
; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
@@ -351,21 +334,20 @@ define i1 @isfinite_f(float %x) {
; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
; X86-FASTISEL-NEXT: setl %al
; X86-FASTISEL-NEXT: popl %ecx
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
ret i1 %0
}
-define i1 @not_isfinite_f(float %x) {
-; X86-SDAGISEL-LABEL: not_isfinite_f:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-SDAGISEL-NEXT: setge %al
-; X86-SDAGISEL-NEXT: retl
+define i1 @not_isfinite_f(float %x) nounwind {
+; X86-LABEL: not_isfinite_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-NEXT: setge %al
+; X86-NEXT: retl
;
; X64-LABEL: not_isfinite_f:
; X64: # %bb.0: # %entry
@@ -378,7 +360,6 @@ define i1 @not_isfinite_f(float %x) {
; X86-FASTISEL-LABEL: not_isfinite_f:
; X86-FASTISEL: # %bb.0: # %entry
; X86-FASTISEL-NEXT: pushl %eax
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
; X86-FASTISEL-NEXT: fstps (%esp)
; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
@@ -386,19 +367,18 @@ define i1 @not_isfinite_f(float %x) {
; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
; X86-FASTISEL-NEXT: setge %al
; X86-FASTISEL-NEXT: popl %ecx
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite"
ret i1 %0
}
-define i1 @is_plus_finite_f(float %x) {
-; X86-SDAGISEL-LABEL: is_plus_finite_f:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
-; X86-SDAGISEL-NEXT: setb %al
-; X86-SDAGISEL-NEXT: retl
+define i1 @is_plus_finite_f(float %x) nounwind {
+; X86-LABEL: is_plus_finite_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-NEXT: setb %al
+; X86-NEXT: retl
;
; X64-LABEL: is_plus_finite_f:
; X64: # %bb.0: # %entry
@@ -410,13 +390,11 @@ define i1 @is_plus_finite_f(float %x) {
; X86-FASTISEL-LABEL: is_plus_finite_f:
; X86-FASTISEL: # %bb.0: # %entry
; X86-FASTISEL-NEXT: pushl %eax
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
; X86-FASTISEL-NEXT: fstps (%esp)
; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000
; X86-FASTISEL-NEXT: setb %al
; X86-FASTISEL-NEXT: popl %ecx
-; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
; X86-FASTISEL-NEXT: retl
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite"
@@ -424,10 +402,10 @@ entry:
}
define i1 @isnone_d(double %x) nounwind {
-; X86-SDAGISEL-LABEL: isnone_d:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: xorl %eax, %eax
-; X86-SDAGISEL-NEXT: retl
+; X86-LABEL: isnone_d:
+; X86: # %bb.0: # %entry
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: retl
;
; X64-LABEL: isnone_d:
; X64: # %bb.0: # %entry
@@ -446,10 +424,10 @@ entry:
}
define i1 @isany_d(double %x) nounwind {
-; X86-SDAGISEL-LABEL: isany_d:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: movb $1, %al
-; X86-SDAGISEL-NEXT: retl
+; X86-LABEL: isany_d:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb $1, %al
+; X86-NEXT: retl
;
; X64-LABEL: isany_d:
; X64: # %bb.0: # %entry
@@ -468,10 +446,10 @@ entry:
}
define i1 @isnone_f80(x86_fp80 %x) nounwind {
-; X86-SDAGISEL-LABEL: isnone_f80:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: xorl %eax, %eax
-; X86-SDAGISEL-NEXT: retl
+; X86-LABEL: isnone_f80:
+; X86: # %bb.0: # %entry
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: retl
;
; X64-SDAGISEL-LABEL: isnone_f80:
; X64-SDAGISEL: # %bb.0: # %entry
@@ -491,16 +469,21 @@ define i1 @isnone_f80(x86_fp80 %x) nounwind {
; X64-FASTISEL-NEXT: fstp %st(0)
; X64-FASTISEL-NEXT: xorl %eax, %eax
; X64-FASTISEL-NEXT: retq
+;
+; X64-GISEL-LABEL: isnone_f80:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: xorl %eax, %eax
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 0)
ret i1 %0
}
define i1 @isany_f80(x86_fp80 %x) nounwind {
-; X86-SDAGISEL-LABEL: isany_f80:
-; X86-SDAGISEL: # %bb.0: # %entry
-; X86-SDAGISEL-NEXT: movb $1, %al
-; X86-SDAGISEL-NEXT: retl
+; X86-LABEL: isany_f80:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb $1, %al
+; X86-NEXT: retl
;
; X64-SDAGISEL-LABEL: isany_f80:
; X64-SDAGISEL: # %bb.0: # %entry
@@ -520,6 +503,11 @@ define i1 @isany_f80(x86_fp80 %x) nounwind {
; X64-FASTISEL-NEXT: fstp %st(0)
; X64-FASTISEL-NEXT: movb $1, %al
; X64-FASTISEL-NEXT: retq
+;
+; X64-GISEL-LABEL: isany_f80:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movb $1, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 1023)
ret i1 %0
diff --git a/llvm/test/CodeGen/X86/isel-smax.ll b/llvm/test/CodeGen/X86/isel-smax.ll
index 9c9a48e..1ce0a80 100644
--- a/llvm/test/CodeGen/X86/isel-smax.ll
+++ b/llvm/test/CodeGen/X86/isel-smax.ll
@@ -1,19 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X64
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64
-; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86
-; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X86
-; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64
+; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86
+; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86,FASTISEL-X86
+; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86
define i8 @smax_i8(i8 %a, i8 %b) nounwind readnone {
-; X64-LABEL: smax_i8:
-; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmpb %al, %dil
-; X64-NEXT: cmovgl %edi, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
+; DAG-X64-LABEL: smax_i8:
+; DAG-X64: # %bb.0:
+; DAG-X64-NEXT: movl %esi, %eax
+; DAG-X64-NEXT: cmpb %al, %dil
+; DAG-X64-NEXT: cmovgl %edi, %eax
+; DAG-X64-NEXT: # kill: def $al killed $al killed $eax
+; DAG-X64-NEXT: retq
;
; FASTISEL-X64-LABEL: smax_i8:
; FASTISEL-X64: # %bb.0:
@@ -24,6 +24,17 @@ define i8 @smax_i8(i8 %a, i8 %b) nounwind readnone {
; FASTISEL-X64-NEXT: # kill: def $al killed $al killed $eax
; FASTISEL-X64-NEXT: retq
;
+; GISEL-X64-LABEL: smax_i8:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movl %esi, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpb %al, %dil
+; GISEL-X64-NEXT: setg %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovnew %di, %ax
+; GISEL-X64-NEXT: # kill: def $al killed $al killed $eax
+; GISEL-X64-NEXT: retq
+;
; X86-LABEL: smax_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
@@ -35,16 +46,20 @@ define i8 @smax_i8(i8 %a, i8 %b) nounwind readnone {
; X86-NEXT: .LBB0_2:
; X86-NEXT: retl
;
-; FASTISEL-X86-LABEL: smax_i8:
-; FASTISEL-X86: # %bb.0:
-; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; FASTISEL-X86-NEXT: cmpb %cl, %al
-; FASTISEL-X86-NEXT: jg .LBB0_2
-; FASTISEL-X86-NEXT: # %bb.1:
-; FASTISEL-X86-NEXT: movl %ecx, %eax
-; FASTISEL-X86-NEXT: .LBB0_2:
-; FASTISEL-X86-NEXT: retl
+; GISEL-X86-LABEL: smax_i8:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: xorl %edx, %edx
+; GISEL-X86-NEXT: cmpb %al, %cl
+; GISEL-X86-NEXT: setg %dl
+; GISEL-X86-NEXT: andl $1, %edx
+; GISEL-X86-NEXT: je .LBB0_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movl %ecx, %eax
+; GISEL-X86-NEXT: .LBB0_2:
+; GISEL-X86-NEXT: # kill: def $al killed $al killed $eax
+; GISEL-X86-NEXT: retl
%ret = call i8 @llvm.smax.i8(i8 %a, i8 %b)
ret i8 %ret
}
@@ -57,25 +72,28 @@ define i16 @smax_i16(i16 %a, i16 %b) nounwind readnone {
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
;
-; FASTISEL-X64-LABEL: smax_i16:
-; FASTISEL-X64: # %bb.0:
-; FASTISEL-X64-NEXT: movl %esi, %eax
-; FASTISEL-X64-NEXT: cmpw %ax, %di
-; FASTISEL-X64-NEXT: cmovgl %edi, %eax
-; FASTISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax
-; FASTISEL-X64-NEXT: retq
+; GISEL-X64-LABEL: smax_i16:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movl %edi, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpw %si, %ax
+; GISEL-X64-NEXT: setg %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovew %si, %ax
+; GISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax
+; GISEL-X64-NEXT: retq
;
-; X86-LABEL: smax_i16:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpw %cx, %ax
-; X86-NEXT: jg .LBB1_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB1_2:
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: retl
+; DAG-X86-LABEL: smax_i16:
+; DAG-X86: # %bb.0:
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; DAG-X86-NEXT: cmpw %cx, %ax
+; DAG-X86-NEXT: jg .LBB1_2
+; DAG-X86-NEXT: # %bb.1:
+; DAG-X86-NEXT: movl %ecx, %eax
+; DAG-X86-NEXT: .LBB1_2:
+; DAG-X86-NEXT: # kill: def $ax killed $ax killed $eax
+; DAG-X86-NEXT: retl
;
; FASTISEL-X86-LABEL: smax_i16:
; FASTISEL-X86: # %bb.0:
@@ -88,6 +106,21 @@ define i16 @smax_i16(i16 %a, i16 %b) nounwind readnone {
; FASTISEL-X86-NEXT: .LBB1_2:
; FASTISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax
; FASTISEL-X86-NEXT: retl
+;
+; GISEL-X86-LABEL: smax_i16:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: xorl %edx, %edx
+; GISEL-X86-NEXT: cmpw %ax, %cx
+; GISEL-X86-NEXT: setg %dl
+; GISEL-X86-NEXT: andl $1, %edx
+; GISEL-X86-NEXT: je .LBB1_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movl %ecx, %eax
+; GISEL-X86-NEXT: .LBB1_2:
+; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax
+; GISEL-X86-NEXT: retl
%ret = call i16 @llvm.smax.i16(i16 %a, i16 %b)
ret i16 %ret
}
@@ -99,12 +132,15 @@ define i32 @smax_i32(i32 %a, i32 %b) nounwind readnone {
; X64-NEXT: cmovgl %edi, %eax
; X64-NEXT: retq
;
-; FASTISEL-X64-LABEL: smax_i32:
-; FASTISEL-X64: # %bb.0:
-; FASTISEL-X64-NEXT: movl %esi, %eax
-; FASTISEL-X64-NEXT: cmpl %esi, %edi
-; FASTISEL-X64-NEXT: cmovgl %edi, %eax
-; FASTISEL-X64-NEXT: retq
+; GISEL-X64-LABEL: smax_i32:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movl %edi, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpl %esi, %edi
+; GISEL-X64-NEXT: setg %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovel %esi, %eax
+; GISEL-X64-NEXT: retq
;
; X86-LABEL: smax_i32:
; X86: # %bb.0:
@@ -117,16 +153,19 @@ define i32 @smax_i32(i32 %a, i32 %b) nounwind readnone {
; X86-NEXT: .LBB2_2:
; X86-NEXT: retl
;
-; FASTISEL-X86-LABEL: smax_i32:
-; FASTISEL-X86: # %bb.0:
-; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; FASTISEL-X86-NEXT: cmpl %ecx, %eax
-; FASTISEL-X86-NEXT: jg .LBB2_2
-; FASTISEL-X86-NEXT: # %bb.1:
-; FASTISEL-X86-NEXT: movl %ecx, %eax
-; FASTISEL-X86-NEXT: .LBB2_2:
-; FASTISEL-X86-NEXT: retl
+; GISEL-X86-LABEL: smax_i32:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: xorl %edx, %edx
+; GISEL-X86-NEXT: cmpl %eax, %ecx
+; GISEL-X86-NEXT: setg %dl
+; GISEL-X86-NEXT: andl $1, %edx
+; GISEL-X86-NEXT: je .LBB2_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movl %ecx, %eax
+; GISEL-X86-NEXT: .LBB2_2:
+; GISEL-X86-NEXT: retl
%ret = call i32 @llvm.smax.i32(i32 %a, i32 %b)
ret i32 %ret
}
@@ -138,32 +177,35 @@ define i64 @smax_i64(i64 %a, i64 %b) nounwind readnone {
; X64-NEXT: cmovgq %rdi, %rax
; X64-NEXT: retq
;
-; FASTISEL-X64-LABEL: smax_i64:
-; FASTISEL-X64: # %bb.0:
-; FASTISEL-X64-NEXT: movq %rsi, %rax
-; FASTISEL-X64-NEXT: cmpq %rsi, %rdi
-; FASTISEL-X64-NEXT: cmovgq %rdi, %rax
-; FASTISEL-X64-NEXT: retq
+; GISEL-X64-LABEL: smax_i64:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movq %rdi, %rax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpq %rsi, %rdi
+; GISEL-X64-NEXT: setg %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmoveq %rsi, %rax
+; GISEL-X64-NEXT: retq
;
-; X86-LABEL: smax_i64:
-; X86: # %bb.0:
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmpl %eax, %ecx
-; X86-NEXT: movl %esi, %edi
-; X86-NEXT: sbbl %edx, %edi
-; X86-NEXT: jl .LBB3_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: movl %esi, %edx
-; X86-NEXT: .LBB3_2:
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: retl
+; DAG-X86-LABEL: smax_i64:
+; DAG-X86: # %bb.0:
+; DAG-X86-NEXT: pushl %edi
+; DAG-X86-NEXT: pushl %esi
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; DAG-X86-NEXT: cmpl %eax, %ecx
+; DAG-X86-NEXT: movl %esi, %edi
+; DAG-X86-NEXT: sbbl %edx, %edi
+; DAG-X86-NEXT: jl .LBB3_2
+; DAG-X86-NEXT: # %bb.1:
+; DAG-X86-NEXT: movl %ecx, %eax
+; DAG-X86-NEXT: movl %esi, %edx
+; DAG-X86-NEXT: .LBB3_2:
+; DAG-X86-NEXT: popl %esi
+; DAG-X86-NEXT: popl %edi
+; DAG-X86-NEXT: retl
;
; FASTISEL-X86-LABEL: smax_i64:
; FASTISEL-X86: # %bb.0:
@@ -184,6 +226,44 @@ define i64 @smax_i64(i64 %a, i64 %b) nounwind readnone {
; FASTISEL-X86-NEXT: popl %esi
; FASTISEL-X86-NEXT: popl %edi
; FASTISEL-X86-NEXT: retl
+;
+; GISEL-X86-LABEL: smax_i64:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: pushl %ebp
+; GISEL-X86-NEXT: pushl %ebx
+; GISEL-X86-NEXT: pushl %edi
+; GISEL-X86-NEXT: pushl %esi
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; GISEL-X86-NEXT: cmpl %eax, %esi
+; GISEL-X86-NEXT: seta %bl
+; GISEL-X86-NEXT: xorl %ecx, %ecx
+; GISEL-X86-NEXT: cmpl %edx, %ebp
+; GISEL-X86-NEXT: setg %bh
+; GISEL-X86-NEXT: sete %cl
+; GISEL-X86-NEXT: testl %ecx, %ecx
+; GISEL-X86-NEXT: je .LBB3_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movb %bl, %bh
+; GISEL-X86-NEXT: .LBB3_2:
+; GISEL-X86-NEXT: movzbl %bh, %edi
+; GISEL-X86-NEXT: andl $1, %edi
+; GISEL-X86-NEXT: je .LBB3_4
+; GISEL-X86-NEXT: # %bb.3:
+; GISEL-X86-NEXT: movl %esi, %eax
+; GISEL-X86-NEXT: .LBB3_4:
+; GISEL-X86-NEXT: testl %edi, %edi
+; GISEL-X86-NEXT: je .LBB3_6
+; GISEL-X86-NEXT: # %bb.5:
+; GISEL-X86-NEXT: movl %ebp, %edx
+; GISEL-X86-NEXT: .LBB3_6:
+; GISEL-X86-NEXT: popl %esi
+; GISEL-X86-NEXT: popl %edi
+; GISEL-X86-NEXT: popl %ebx
+; GISEL-X86-NEXT: popl %ebp
+; GISEL-X86-NEXT: retl
%ret = call i64 @llvm.smax.i64(i64 %a, i64 %b)
ret i64 %ret
}
diff --git a/llvm/test/CodeGen/X86/isel-smin.ll b/llvm/test/CodeGen/X86/isel-smin.ll
index 7349a7c..bbed3c3 100644
--- a/llvm/test/CodeGen/X86/isel-smin.ll
+++ b/llvm/test/CodeGen/X86/isel-smin.ll
@@ -1,19 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X64
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64
-; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86
-; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X86
-; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64
+; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86
+; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86,FASTISEL-X86
+; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86
define i8 @smin_i8(i8 %a, i8 %b) nounwind readnone {
-; X64-LABEL: smin_i8:
-; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmpb %al, %dil
-; X64-NEXT: cmovll %edi, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
+; DAG-X64-LABEL: smin_i8:
+; DAG-X64: # %bb.0:
+; DAG-X64-NEXT: movl %esi, %eax
+; DAG-X64-NEXT: cmpb %al, %dil
+; DAG-X64-NEXT: cmovll %edi, %eax
+; DAG-X64-NEXT: # kill: def $al killed $al killed $eax
+; DAG-X64-NEXT: retq
;
; FASTISEL-X64-LABEL: smin_i8:
; FASTISEL-X64: # %bb.0:
@@ -24,6 +24,17 @@ define i8 @smin_i8(i8 %a, i8 %b) nounwind readnone {
; FASTISEL-X64-NEXT: # kill: def $al killed $al killed $eax
; FASTISEL-X64-NEXT: retq
;
+; GISEL-X64-LABEL: smin_i8:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movl %esi, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpb %al, %dil
+; GISEL-X64-NEXT: setl %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovnew %di, %ax
+; GISEL-X64-NEXT: # kill: def $al killed $al killed $eax
+; GISEL-X64-NEXT: retq
+;
; X86-LABEL: smin_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
@@ -35,16 +46,20 @@ define i8 @smin_i8(i8 %a, i8 %b) nounwind readnone {
; X86-NEXT: .LBB0_2:
; X86-NEXT: retl
;
-; FASTISEL-X86-LABEL: smin_i8:
-; FASTISEL-X86: # %bb.0:
-; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; FASTISEL-X86-NEXT: cmpb %cl, %al
-; FASTISEL-X86-NEXT: jl .LBB0_2
-; FASTISEL-X86-NEXT: # %bb.1:
-; FASTISEL-X86-NEXT: movl %ecx, %eax
-; FASTISEL-X86-NEXT: .LBB0_2:
-; FASTISEL-X86-NEXT: retl
+; GISEL-X86-LABEL: smin_i8:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: xorl %edx, %edx
+; GISEL-X86-NEXT: cmpb %al, %cl
+; GISEL-X86-NEXT: setl %dl
+; GISEL-X86-NEXT: andl $1, %edx
+; GISEL-X86-NEXT: je .LBB0_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movl %ecx, %eax
+; GISEL-X86-NEXT: .LBB0_2:
+; GISEL-X86-NEXT: # kill: def $al killed $al killed $eax
+; GISEL-X86-NEXT: retl
%ret = call i8 @llvm.smin.i8(i8 %a, i8 %b)
ret i8 %ret
}
@@ -57,25 +72,28 @@ define i16 @smin_i16(i16 %a, i16 %b) nounwind readnone {
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
;
-; FASTISEL-X64-LABEL: smin_i16:
-; FASTISEL-X64: # %bb.0:
-; FASTISEL-X64-NEXT: movl %esi, %eax
-; FASTISEL-X64-NEXT: cmpw %ax, %di
-; FASTISEL-X64-NEXT: cmovll %edi, %eax
-; FASTISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax
-; FASTISEL-X64-NEXT: retq
+; GISEL-X64-LABEL: smin_i16:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movl %edi, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpw %si, %ax
+; GISEL-X64-NEXT: setl %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovew %si, %ax
+; GISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax
+; GISEL-X64-NEXT: retq
;
-; X86-LABEL: smin_i16:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpw %cx, %ax
-; X86-NEXT: jl .LBB1_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB1_2:
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: retl
+; DAG-X86-LABEL: smin_i16:
+; DAG-X86: # %bb.0:
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; DAG-X86-NEXT: cmpw %cx, %ax
+; DAG-X86-NEXT: jl .LBB1_2
+; DAG-X86-NEXT: # %bb.1:
+; DAG-X86-NEXT: movl %ecx, %eax
+; DAG-X86-NEXT: .LBB1_2:
+; DAG-X86-NEXT: # kill: def $ax killed $ax killed $eax
+; DAG-X86-NEXT: retl
;
; FASTISEL-X86-LABEL: smin_i16:
; FASTISEL-X86: # %bb.0:
@@ -88,6 +106,21 @@ define i16 @smin_i16(i16 %a, i16 %b) nounwind readnone {
; FASTISEL-X86-NEXT: .LBB1_2:
; FASTISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax
; FASTISEL-X86-NEXT: retl
+;
+; GISEL-X86-LABEL: smin_i16:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: xorl %edx, %edx
+; GISEL-X86-NEXT: cmpw %ax, %cx
+; GISEL-X86-NEXT: setl %dl
+; GISEL-X86-NEXT: andl $1, %edx
+; GISEL-X86-NEXT: je .LBB1_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movl %ecx, %eax
+; GISEL-X86-NEXT: .LBB1_2:
+; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax
+; GISEL-X86-NEXT: retl
%ret = call i16 @llvm.smin.i16(i16 %a, i16 %b)
ret i16 %ret
}
@@ -99,12 +132,15 @@ define i32 @smin_i32(i32 %a, i32 %b) nounwind readnone {
; X64-NEXT: cmovll %edi, %eax
; X64-NEXT: retq
;
-; FASTISEL-X64-LABEL: smin_i32:
-; FASTISEL-X64: # %bb.0:
-; FASTISEL-X64-NEXT: movl %esi, %eax
-; FASTISEL-X64-NEXT: cmpl %esi, %edi
-; FASTISEL-X64-NEXT: cmovll %edi, %eax
-; FASTISEL-X64-NEXT: retq
+; GISEL-X64-LABEL: smin_i32:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movl %edi, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpl %esi, %edi
+; GISEL-X64-NEXT: setl %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovel %esi, %eax
+; GISEL-X64-NEXT: retq
;
; X86-LABEL: smin_i32:
; X86: # %bb.0:
@@ -117,16 +153,19 @@ define i32 @smin_i32(i32 %a, i32 %b) nounwind readnone {
; X86-NEXT: .LBB2_2:
; X86-NEXT: retl
;
-; FASTISEL-X86-LABEL: smin_i32:
-; FASTISEL-X86: # %bb.0:
-; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; FASTISEL-X86-NEXT: cmpl %ecx, %eax
-; FASTISEL-X86-NEXT: jl .LBB2_2
-; FASTISEL-X86-NEXT: # %bb.1:
-; FASTISEL-X86-NEXT: movl %ecx, %eax
-; FASTISEL-X86-NEXT: .LBB2_2:
-; FASTISEL-X86-NEXT: retl
+; GISEL-X86-LABEL: smin_i32:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: xorl %edx, %edx
+; GISEL-X86-NEXT: cmpl %eax, %ecx
+; GISEL-X86-NEXT: setl %dl
+; GISEL-X86-NEXT: andl $1, %edx
+; GISEL-X86-NEXT: je .LBB2_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movl %ecx, %eax
+; GISEL-X86-NEXT: .LBB2_2:
+; GISEL-X86-NEXT: retl
%ret = call i32 @llvm.smin.i32(i32 %a, i32 %b)
ret i32 %ret
}
@@ -138,32 +177,35 @@ define i64 @smin_i64(i64 %a, i64 %b) nounwind readnone {
; X64-NEXT: cmovlq %rdi, %rax
; X64-NEXT: retq
;
-; FASTISEL-X64-LABEL: smin_i64:
-; FASTISEL-X64: # %bb.0:
-; FASTISEL-X64-NEXT: movq %rsi, %rax
-; FASTISEL-X64-NEXT: cmpq %rsi, %rdi
-; FASTISEL-X64-NEXT: cmovlq %rdi, %rax
-; FASTISEL-X64-NEXT: retq
+; GISEL-X64-LABEL: smin_i64:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movq %rdi, %rax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpq %rsi, %rdi
+; GISEL-X64-NEXT: setl %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmoveq %rsi, %rax
+; GISEL-X64-NEXT: retq
;
-; X86-LABEL: smin_i64:
-; X86: # %bb.0:
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpl %ecx, %eax
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: jl .LBB3_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: movl %esi, %edx
-; X86-NEXT: .LBB3_2:
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: retl
+; DAG-X86-LABEL: smin_i64:
+; DAG-X86: # %bb.0:
+; DAG-X86-NEXT: pushl %edi
+; DAG-X86-NEXT: pushl %esi
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; DAG-X86-NEXT: cmpl %ecx, %eax
+; DAG-X86-NEXT: movl %edx, %edi
+; DAG-X86-NEXT: sbbl %esi, %edi
+; DAG-X86-NEXT: jl .LBB3_2
+; DAG-X86-NEXT: # %bb.1:
+; DAG-X86-NEXT: movl %ecx, %eax
+; DAG-X86-NEXT: movl %esi, %edx
+; DAG-X86-NEXT: .LBB3_2:
+; DAG-X86-NEXT: popl %esi
+; DAG-X86-NEXT: popl %edi
+; DAG-X86-NEXT: retl
;
; FASTISEL-X86-LABEL: smin_i64:
; FASTISEL-X86: # %bb.0:
@@ -184,6 +226,44 @@ define i64 @smin_i64(i64 %a, i64 %b) nounwind readnone {
; FASTISEL-X86-NEXT: popl %esi
; FASTISEL-X86-NEXT: popl %edi
; FASTISEL-X86-NEXT: retl
+;
+; GISEL-X86-LABEL: smin_i64:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: pushl %ebp
+; GISEL-X86-NEXT: pushl %ebx
+; GISEL-X86-NEXT: pushl %edi
+; GISEL-X86-NEXT: pushl %esi
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; GISEL-X86-NEXT: cmpl %eax, %esi
+; GISEL-X86-NEXT: setb %bl
+; GISEL-X86-NEXT: xorl %ecx, %ecx
+; GISEL-X86-NEXT: cmpl %edx, %ebp
+; GISEL-X86-NEXT: setl %bh
+; GISEL-X86-NEXT: sete %cl
+; GISEL-X86-NEXT: testl %ecx, %ecx
+; GISEL-X86-NEXT: je .LBB3_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movb %bl, %bh
+; GISEL-X86-NEXT: .LBB3_2:
+; GISEL-X86-NEXT: movzbl %bh, %edi
+; GISEL-X86-NEXT: andl $1, %edi
+; GISEL-X86-NEXT: je .LBB3_4
+; GISEL-X86-NEXT: # %bb.3:
+; GISEL-X86-NEXT: movl %esi, %eax
+; GISEL-X86-NEXT: .LBB3_4:
+; GISEL-X86-NEXT: testl %edi, %edi
+; GISEL-X86-NEXT: je .LBB3_6
+; GISEL-X86-NEXT: # %bb.5:
+; GISEL-X86-NEXT: movl %ebp, %edx
+; GISEL-X86-NEXT: .LBB3_6:
+; GISEL-X86-NEXT: popl %esi
+; GISEL-X86-NEXT: popl %edi
+; GISEL-X86-NEXT: popl %ebx
+; GISEL-X86-NEXT: popl %ebp
+; GISEL-X86-NEXT: retl
%ret = call i64 @llvm.smin.i64(i64 %a, i64 %b)
ret i64 %ret
}
diff --git a/llvm/test/CodeGen/X86/isel-umax.ll b/llvm/test/CodeGen/X86/isel-umax.ll
index a90456c..990af26 100644
--- a/llvm/test/CodeGen/X86/isel-umax.ll
+++ b/llvm/test/CodeGen/X86/isel-umax.ll
@@ -1,19 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X64
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64
-; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86
-; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X86
-; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64
+; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86
+; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86,FASTISEL-X86
+; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86
define i8 @umax_i8(i8 %a, i8 %b) nounwind readnone {
-; X64-LABEL: umax_i8:
-; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmpb %al, %dil
-; X64-NEXT: cmoval %edi, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
+; DAG-X64-LABEL: umax_i8:
+; DAG-X64: # %bb.0:
+; DAG-X64-NEXT: movl %esi, %eax
+; DAG-X64-NEXT: cmpb %al, %dil
+; DAG-X64-NEXT: cmoval %edi, %eax
+; DAG-X64-NEXT: # kill: def $al killed $al killed $eax
+; DAG-X64-NEXT: retq
;
; FASTISEL-X64-LABEL: umax_i8:
; FASTISEL-X64: # %bb.0:
@@ -24,6 +24,17 @@ define i8 @umax_i8(i8 %a, i8 %b) nounwind readnone {
; FASTISEL-X64-NEXT: # kill: def $al killed $al killed $eax
; FASTISEL-X64-NEXT: retq
;
+; GISEL-X64-LABEL: umax_i8:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movl %esi, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpb %al, %dil
+; GISEL-X64-NEXT: seta %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovnew %di, %ax
+; GISEL-X64-NEXT: # kill: def $al killed $al killed $eax
+; GISEL-X64-NEXT: retq
+;
; X86-LABEL: umax_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
@@ -35,16 +46,20 @@ define i8 @umax_i8(i8 %a, i8 %b) nounwind readnone {
; X86-NEXT: .LBB0_2:
; X86-NEXT: retl
;
-; FASTISEL-X86-LABEL: umax_i8:
-; FASTISEL-X86: # %bb.0:
-; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; FASTISEL-X86-NEXT: cmpb %cl, %al
-; FASTISEL-X86-NEXT: ja .LBB0_2
-; FASTISEL-X86-NEXT: # %bb.1:
-; FASTISEL-X86-NEXT: movl %ecx, %eax
-; FASTISEL-X86-NEXT: .LBB0_2:
-; FASTISEL-X86-NEXT: retl
+; GISEL-X86-LABEL: umax_i8:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: xorl %edx, %edx
+; GISEL-X86-NEXT: cmpb %al, %cl
+; GISEL-X86-NEXT: seta %dl
+; GISEL-X86-NEXT: andl $1, %edx
+; GISEL-X86-NEXT: je .LBB0_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movl %ecx, %eax
+; GISEL-X86-NEXT: .LBB0_2:
+; GISEL-X86-NEXT: # kill: def $al killed $al killed $eax
+; GISEL-X86-NEXT: retl
%ret = call i8 @llvm.umax.i8(i8 %a, i8 %b)
ret i8 %ret
}
@@ -57,25 +72,28 @@ define i16 @umax_i16(i16 %a, i16 %b) nounwind readnone {
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
;
-; FASTISEL-X64-LABEL: umax_i16:
-; FASTISEL-X64: # %bb.0:
-; FASTISEL-X64-NEXT: movl %esi, %eax
-; FASTISEL-X64-NEXT: cmpw %ax, %di
-; FASTISEL-X64-NEXT: cmoval %edi, %eax
-; FASTISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax
-; FASTISEL-X64-NEXT: retq
+; GISEL-X64-LABEL: umax_i16:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movl %edi, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpw %si, %ax
+; GISEL-X64-NEXT: seta %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovew %si, %ax
+; GISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax
+; GISEL-X64-NEXT: retq
;
-; X86-LABEL: umax_i16:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpw %cx, %ax
-; X86-NEXT: ja .LBB1_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB1_2:
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: retl
+; DAG-X86-LABEL: umax_i16:
+; DAG-X86: # %bb.0:
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; DAG-X86-NEXT: cmpw %cx, %ax
+; DAG-X86-NEXT: ja .LBB1_2
+; DAG-X86-NEXT: # %bb.1:
+; DAG-X86-NEXT: movl %ecx, %eax
+; DAG-X86-NEXT: .LBB1_2:
+; DAG-X86-NEXT: # kill: def $ax killed $ax killed $eax
+; DAG-X86-NEXT: retl
;
; FASTISEL-X86-LABEL: umax_i16:
; FASTISEL-X86: # %bb.0:
@@ -88,6 +106,21 @@ define i16 @umax_i16(i16 %a, i16 %b) nounwind readnone {
; FASTISEL-X86-NEXT: .LBB1_2:
; FASTISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax
; FASTISEL-X86-NEXT: retl
+;
+; GISEL-X86-LABEL: umax_i16:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: xorl %edx, %edx
+; GISEL-X86-NEXT: cmpw %ax, %cx
+; GISEL-X86-NEXT: seta %dl
+; GISEL-X86-NEXT: andl $1, %edx
+; GISEL-X86-NEXT: je .LBB1_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movl %ecx, %eax
+; GISEL-X86-NEXT: .LBB1_2:
+; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax
+; GISEL-X86-NEXT: retl
%ret = call i16 @llvm.umax.i16(i16 %a, i16 %b)
ret i16 %ret
}
@@ -99,12 +132,15 @@ define i32 @umax_i32(i32 %a, i32 %b) nounwind readnone {
; X64-NEXT: cmoval %edi, %eax
; X64-NEXT: retq
;
-; FASTISEL-X64-LABEL: umax_i32:
-; FASTISEL-X64: # %bb.0:
-; FASTISEL-X64-NEXT: movl %esi, %eax
-; FASTISEL-X64-NEXT: cmpl %esi, %edi
-; FASTISEL-X64-NEXT: cmoval %edi, %eax
-; FASTISEL-X64-NEXT: retq
+; GISEL-X64-LABEL: umax_i32:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movl %edi, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpl %esi, %edi
+; GISEL-X64-NEXT: seta %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovel %esi, %eax
+; GISEL-X64-NEXT: retq
;
; X86-LABEL: umax_i32:
; X86: # %bb.0:
@@ -117,16 +153,19 @@ define i32 @umax_i32(i32 %a, i32 %b) nounwind readnone {
; X86-NEXT: .LBB2_2:
; X86-NEXT: retl
;
-; FASTISEL-X86-LABEL: umax_i32:
-; FASTISEL-X86: # %bb.0:
-; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; FASTISEL-X86-NEXT: cmpl %ecx, %eax
-; FASTISEL-X86-NEXT: ja .LBB2_2
-; FASTISEL-X86-NEXT: # %bb.1:
-; FASTISEL-X86-NEXT: movl %ecx, %eax
-; FASTISEL-X86-NEXT: .LBB2_2:
-; FASTISEL-X86-NEXT: retl
+; GISEL-X86-LABEL: umax_i32:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: xorl %edx, %edx
+; GISEL-X86-NEXT: cmpl %eax, %ecx
+; GISEL-X86-NEXT: seta %dl
+; GISEL-X86-NEXT: andl $1, %edx
+; GISEL-X86-NEXT: je .LBB2_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movl %ecx, %eax
+; GISEL-X86-NEXT: .LBB2_2:
+; GISEL-X86-NEXT: retl
%ret = call i32 @llvm.umax.i32(i32 %a, i32 %b)
ret i32 %ret
}
@@ -138,32 +177,35 @@ define i64 @umax_i64(i64 %a, i64 %b) nounwind readnone {
; X64-NEXT: cmovaq %rdi, %rax
; X64-NEXT: retq
;
-; FASTISEL-X64-LABEL: umax_i64:
-; FASTISEL-X64: # %bb.0:
-; FASTISEL-X64-NEXT: movq %rsi, %rax
-; FASTISEL-X64-NEXT: cmpq %rsi, %rdi
-; FASTISEL-X64-NEXT: cmovaq %rdi, %rax
-; FASTISEL-X64-NEXT: retq
+; GISEL-X64-LABEL: umax_i64:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movq %rdi, %rax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpq %rsi, %rdi
+; GISEL-X64-NEXT: seta %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmoveq %rsi, %rax
+; GISEL-X64-NEXT: retq
;
-; X86-LABEL: umax_i64:
-; X86: # %bb.0:
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmpl %eax, %ecx
-; X86-NEXT: movl %esi, %edi
-; X86-NEXT: sbbl %edx, %edi
-; X86-NEXT: jb .LBB3_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: movl %esi, %edx
-; X86-NEXT: .LBB3_2:
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: retl
+; DAG-X86-LABEL: umax_i64:
+; DAG-X86: # %bb.0:
+; DAG-X86-NEXT: pushl %edi
+; DAG-X86-NEXT: pushl %esi
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; DAG-X86-NEXT: cmpl %eax, %ecx
+; DAG-X86-NEXT: movl %esi, %edi
+; DAG-X86-NEXT: sbbl %edx, %edi
+; DAG-X86-NEXT: jb .LBB3_2
+; DAG-X86-NEXT: # %bb.1:
+; DAG-X86-NEXT: movl %ecx, %eax
+; DAG-X86-NEXT: movl %esi, %edx
+; DAG-X86-NEXT: .LBB3_2:
+; DAG-X86-NEXT: popl %esi
+; DAG-X86-NEXT: popl %edi
+; DAG-X86-NEXT: retl
;
; FASTISEL-X86-LABEL: umax_i64:
; FASTISEL-X86: # %bb.0:
@@ -184,6 +226,44 @@ define i64 @umax_i64(i64 %a, i64 %b) nounwind readnone {
; FASTISEL-X86-NEXT: popl %esi
; FASTISEL-X86-NEXT: popl %edi
; FASTISEL-X86-NEXT: retl
+;
+; GISEL-X86-LABEL: umax_i64:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: pushl %ebp
+; GISEL-X86-NEXT: pushl %ebx
+; GISEL-X86-NEXT: pushl %edi
+; GISEL-X86-NEXT: pushl %esi
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; GISEL-X86-NEXT: cmpl %eax, %esi
+; GISEL-X86-NEXT: seta %bl
+; GISEL-X86-NEXT: xorl %ecx, %ecx
+; GISEL-X86-NEXT: cmpl %edx, %ebp
+; GISEL-X86-NEXT: seta %bh
+; GISEL-X86-NEXT: sete %cl
+; GISEL-X86-NEXT: testl %ecx, %ecx
+; GISEL-X86-NEXT: je .LBB3_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movb %bl, %bh
+; GISEL-X86-NEXT: .LBB3_2:
+; GISEL-X86-NEXT: movzbl %bh, %edi
+; GISEL-X86-NEXT: andl $1, %edi
+; GISEL-X86-NEXT: je .LBB3_4
+; GISEL-X86-NEXT: # %bb.3:
+; GISEL-X86-NEXT: movl %esi, %eax
+; GISEL-X86-NEXT: .LBB3_4:
+; GISEL-X86-NEXT: testl %edi, %edi
+; GISEL-X86-NEXT: je .LBB3_6
+; GISEL-X86-NEXT: # %bb.5:
+; GISEL-X86-NEXT: movl %ebp, %edx
+; GISEL-X86-NEXT: .LBB3_6:
+; GISEL-X86-NEXT: popl %esi
+; GISEL-X86-NEXT: popl %edi
+; GISEL-X86-NEXT: popl %ebx
+; GISEL-X86-NEXT: popl %ebp
+; GISEL-X86-NEXT: retl
%ret = call i64 @llvm.umax.i64(i64 %a, i64 %b)
ret i64 %ret
}
diff --git a/llvm/test/CodeGen/X86/isel-umin.ll b/llvm/test/CodeGen/X86/isel-umin.ll
index 53a0b27..1710b9f 100644
--- a/llvm/test/CodeGen/X86/isel-umin.ll
+++ b/llvm/test/CodeGen/X86/isel-umin.ll
@@ -1,19 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X64
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64
-; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86
-; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X86
-; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64
+; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86
+; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86,FASTISEL-X86
+; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86
define i8 @umin_i8(i8 %a, i8 %b) nounwind readnone {
-; X64-LABEL: umin_i8:
-; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmpb %al, %dil
-; X64-NEXT: cmovbl %edi, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
-; X64-NEXT: retq
+; DAG-X64-LABEL: umin_i8:
+; DAG-X64: # %bb.0:
+; DAG-X64-NEXT: movl %esi, %eax
+; DAG-X64-NEXT: cmpb %al, %dil
+; DAG-X64-NEXT: cmovbl %edi, %eax
+; DAG-X64-NEXT: # kill: def $al killed $al killed $eax
+; DAG-X64-NEXT: retq
;
; FASTISEL-X64-LABEL: umin_i8:
; FASTISEL-X64: # %bb.0:
@@ -24,6 +24,17 @@ define i8 @umin_i8(i8 %a, i8 %b) nounwind readnone {
; FASTISEL-X64-NEXT: # kill: def $al killed $al killed $eax
; FASTISEL-X64-NEXT: retq
;
+; GISEL-X64-LABEL: umin_i8:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movl %esi, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpb %al, %dil
+; GISEL-X64-NEXT: setb %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovnew %di, %ax
+; GISEL-X64-NEXT: # kill: def $al killed $al killed $eax
+; GISEL-X64-NEXT: retq
+;
; X86-LABEL: umin_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
@@ -35,16 +46,20 @@ define i8 @umin_i8(i8 %a, i8 %b) nounwind readnone {
; X86-NEXT: .LBB0_2:
; X86-NEXT: retl
;
-; FASTISEL-X86-LABEL: umin_i8:
-; FASTISEL-X86: # %bb.0:
-; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; FASTISEL-X86-NEXT: cmpb %cl, %al
-; FASTISEL-X86-NEXT: jb .LBB0_2
-; FASTISEL-X86-NEXT: # %bb.1:
-; FASTISEL-X86-NEXT: movl %ecx, %eax
-; FASTISEL-X86-NEXT: .LBB0_2:
-; FASTISEL-X86-NEXT: retl
+; GISEL-X86-LABEL: umin_i8:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: xorl %edx, %edx
+; GISEL-X86-NEXT: cmpb %al, %cl
+; GISEL-X86-NEXT: setb %dl
+; GISEL-X86-NEXT: andl $1, %edx
+; GISEL-X86-NEXT: je .LBB0_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movl %ecx, %eax
+; GISEL-X86-NEXT: .LBB0_2:
+; GISEL-X86-NEXT: # kill: def $al killed $al killed $eax
+; GISEL-X86-NEXT: retl
%ret = call i8 @llvm.umin.i8(i8 %a, i8 %b)
ret i8 %ret
}
@@ -57,25 +72,28 @@ define i16 @umin_i16(i16 %a, i16 %b) nounwind readnone {
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
;
-; FASTISEL-X64-LABEL: umin_i16:
-; FASTISEL-X64: # %bb.0:
-; FASTISEL-X64-NEXT: movl %esi, %eax
-; FASTISEL-X64-NEXT: cmpw %ax, %di
-; FASTISEL-X64-NEXT: cmovbl %edi, %eax
-; FASTISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax
-; FASTISEL-X64-NEXT: retq
+; GISEL-X64-LABEL: umin_i16:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movl %edi, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpw %si, %ax
+; GISEL-X64-NEXT: setb %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovew %si, %ax
+; GISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax
+; GISEL-X64-NEXT: retq
;
-; X86-LABEL: umin_i16:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpw %cx, %ax
-; X86-NEXT: jb .LBB1_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB1_2:
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: retl
+; DAG-X86-LABEL: umin_i16:
+; DAG-X86: # %bb.0:
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; DAG-X86-NEXT: cmpw %cx, %ax
+; DAG-X86-NEXT: jb .LBB1_2
+; DAG-X86-NEXT: # %bb.1:
+; DAG-X86-NEXT: movl %ecx, %eax
+; DAG-X86-NEXT: .LBB1_2:
+; DAG-X86-NEXT: # kill: def $ax killed $ax killed $eax
+; DAG-X86-NEXT: retl
;
; FASTISEL-X86-LABEL: umin_i16:
; FASTISEL-X86: # %bb.0:
@@ -88,6 +106,21 @@ define i16 @umin_i16(i16 %a, i16 %b) nounwind readnone {
; FASTISEL-X86-NEXT: .LBB1_2:
; FASTISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax
; FASTISEL-X86-NEXT: retl
+;
+; GISEL-X86-LABEL: umin_i16:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: xorl %edx, %edx
+; GISEL-X86-NEXT: cmpw %ax, %cx
+; GISEL-X86-NEXT: setb %dl
+; GISEL-X86-NEXT: andl $1, %edx
+; GISEL-X86-NEXT: je .LBB1_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movl %ecx, %eax
+; GISEL-X86-NEXT: .LBB1_2:
+; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax
+; GISEL-X86-NEXT: retl
%ret = call i16 @llvm.umin.i16(i16 %a, i16 %b)
ret i16 %ret
}
@@ -99,12 +132,15 @@ define i32 @umin_i32(i32 %a, i32 %b) nounwind readnone {
; X64-NEXT: cmovbl %edi, %eax
; X64-NEXT: retq
;
-; FASTISEL-X64-LABEL: umin_i32:
-; FASTISEL-X64: # %bb.0:
-; FASTISEL-X64-NEXT: movl %esi, %eax
-; FASTISEL-X64-NEXT: cmpl %esi, %edi
-; FASTISEL-X64-NEXT: cmovbl %edi, %eax
-; FASTISEL-X64-NEXT: retq
+; GISEL-X64-LABEL: umin_i32:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movl %edi, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpl %esi, %edi
+; GISEL-X64-NEXT: setb %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovel %esi, %eax
+; GISEL-X64-NEXT: retq
;
; X86-LABEL: umin_i32:
; X86: # %bb.0:
@@ -117,16 +153,19 @@ define i32 @umin_i32(i32 %a, i32 %b) nounwind readnone {
; X86-NEXT: .LBB2_2:
; X86-NEXT: retl
;
-; FASTISEL-X86-LABEL: umin_i32:
-; FASTISEL-X86: # %bb.0:
-; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; FASTISEL-X86-NEXT: cmpl %ecx, %eax
-; FASTISEL-X86-NEXT: jb .LBB2_2
-; FASTISEL-X86-NEXT: # %bb.1:
-; FASTISEL-X86-NEXT: movl %ecx, %eax
-; FASTISEL-X86-NEXT: .LBB2_2:
-; FASTISEL-X86-NEXT: retl
+; GISEL-X86-LABEL: umin_i32:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: xorl %edx, %edx
+; GISEL-X86-NEXT: cmpl %eax, %ecx
+; GISEL-X86-NEXT: setb %dl
+; GISEL-X86-NEXT: andl $1, %edx
+; GISEL-X86-NEXT: je .LBB2_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movl %ecx, %eax
+; GISEL-X86-NEXT: .LBB2_2:
+; GISEL-X86-NEXT: retl
%ret = call i32 @llvm.umin.i32(i32 %a, i32 %b)
ret i32 %ret
}
@@ -138,32 +177,35 @@ define i64 @umin_i64(i64 %a, i64 %b) nounwind readnone {
; X64-NEXT: cmovbq %rdi, %rax
; X64-NEXT: retq
;
-; FASTISEL-X64-LABEL: umin_i64:
-; FASTISEL-X64: # %bb.0:
-; FASTISEL-X64-NEXT: movq %rsi, %rax
-; FASTISEL-X64-NEXT: cmpq %rsi, %rdi
-; FASTISEL-X64-NEXT: cmovbq %rdi, %rax
-; FASTISEL-X64-NEXT: retq
+; GISEL-X64-LABEL: umin_i64:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: movq %rdi, %rax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpq %rsi, %rdi
+; GISEL-X64-NEXT: setb %cl
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmoveq %rsi, %rax
+; GISEL-X64-NEXT: retq
;
-; X86-LABEL: umin_i64:
-; X86: # %bb.0:
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpl %ecx, %eax
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: jb .LBB3_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: movl %esi, %edx
-; X86-NEXT: .LBB3_2:
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: retl
+; DAG-X86-LABEL: umin_i64:
+; DAG-X86: # %bb.0:
+; DAG-X86-NEXT: pushl %edi
+; DAG-X86-NEXT: pushl %esi
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; DAG-X86-NEXT: cmpl %ecx, %eax
+; DAG-X86-NEXT: movl %edx, %edi
+; DAG-X86-NEXT: sbbl %esi, %edi
+; DAG-X86-NEXT: jb .LBB3_2
+; DAG-X86-NEXT: # %bb.1:
+; DAG-X86-NEXT: movl %ecx, %eax
+; DAG-X86-NEXT: movl %esi, %edx
+; DAG-X86-NEXT: .LBB3_2:
+; DAG-X86-NEXT: popl %esi
+; DAG-X86-NEXT: popl %edi
+; DAG-X86-NEXT: retl
;
; FASTISEL-X86-LABEL: umin_i64:
; FASTISEL-X86: # %bb.0:
@@ -184,6 +226,44 @@ define i64 @umin_i64(i64 %a, i64 %b) nounwind readnone {
; FASTISEL-X86-NEXT: popl %esi
; FASTISEL-X86-NEXT: popl %edi
; FASTISEL-X86-NEXT: retl
+;
+; GISEL-X86-LABEL: umin_i64:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: pushl %ebp
+; GISEL-X86-NEXT: pushl %ebx
+; GISEL-X86-NEXT: pushl %edi
+; GISEL-X86-NEXT: pushl %esi
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; GISEL-X86-NEXT: cmpl %eax, %esi
+; GISEL-X86-NEXT: setb %bl
+; GISEL-X86-NEXT: xorl %ecx, %ecx
+; GISEL-X86-NEXT: cmpl %edx, %ebp
+; GISEL-X86-NEXT: setb %bh
+; GISEL-X86-NEXT: sete %cl
+; GISEL-X86-NEXT: testl %ecx, %ecx
+; GISEL-X86-NEXT: je .LBB3_2
+; GISEL-X86-NEXT: # %bb.1:
+; GISEL-X86-NEXT: movb %bl, %bh
+; GISEL-X86-NEXT: .LBB3_2:
+; GISEL-X86-NEXT: movzbl %bh, %edi
+; GISEL-X86-NEXT: andl $1, %edi
+; GISEL-X86-NEXT: je .LBB3_4
+; GISEL-X86-NEXT: # %bb.3:
+; GISEL-X86-NEXT: movl %esi, %eax
+; GISEL-X86-NEXT: .LBB3_4:
+; GISEL-X86-NEXT: testl %edi, %edi
+; GISEL-X86-NEXT: je .LBB3_6
+; GISEL-X86-NEXT: # %bb.5:
+; GISEL-X86-NEXT: movl %ebp, %edx
+; GISEL-X86-NEXT: .LBB3_6:
+; GISEL-X86-NEXT: popl %esi
+; GISEL-X86-NEXT: popl %edi
+; GISEL-X86-NEXT: popl %ebx
+; GISEL-X86-NEXT: popl %ebp
+; GISEL-X86-NEXT: retl
%ret = call i64 @llvm.umin.i64(i64 %a, i64 %b)
ret i64 %ret
}
diff --git a/llvm/test/CodeGen/X86/logic-shift.ll b/llvm/test/CodeGen/X86/logic-shift.ll
index 96e63d1..104151c 100644
--- a/llvm/test/CodeGen/X86/logic-shift.ll
+++ b/llvm/test/CodeGen/X86/logic-shift.ll
@@ -129,10 +129,10 @@ define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <
; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5
-; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
+; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm6
+; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5
-; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vpsllw $2, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -140,10 +140,10 @@ define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <
; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4
-; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm5
+; CHECK-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4
-; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpsllw $2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
@@ -413,10 +413,10 @@ define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y,
; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5
-; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
+; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm6
+; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5
-; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vpsllw $2, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -424,10 +424,10 @@ define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y,
; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4
-; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm5
+; CHECK-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4
-; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpsllw $2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
@@ -697,10 +697,10 @@ define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y,
; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5
-; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
+; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm6
+; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5
-; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vpsllw $2, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -708,10 +708,10 @@ define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y,
; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4
-; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm5
+; CHECK-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4
-; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpsllw $2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/pr161693.ll b/llvm/test/CodeGen/X86/pr161693.ll
new file mode 100644
index 0000000..de8188f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr161693.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
+
+define void @PR161693() #0 {
+; CHECK-LABEL: PR161693:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: movzbl (%rax), %eax
+; CHECK-NEXT: andb $-33, %al
+; CHECK-NEXT: addb $-71, %al
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1: # %loop
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: cmpb $-6, %al
+; CHECK-NEXT: setb %cl
+; CHECK-NEXT: leal (%rcx,%rcx), %edx
+; CHECK-NEXT: orb %cl, %dl
+; CHECK-NEXT: leal (,%rdx,4), %ecx
+; CHECK-NEXT: orb %dl, %cl
+; CHECK-NEXT: je .LBB0_1
+; CHECK-NEXT: # %bb.2: # %exit
+; CHECK-NEXT: retq
+start:
+ br label %loop
+
+loop:
+ %.val.i.i89 = load <16 x i8>, ptr poison, align 1
+ %.not49.i = icmp ult <16 x i8> zeroinitializer, splat (i8 -10)
+ %i = and <16 x i8> %.val.i.i89, splat (i8 -33)
+ %i1 = add <16 x i8> %i, splat (i8 -71)
+ %.not51.i = icmp ult <16 x i8> %i1, splat (i8 -6)
+ %.not46.i = and <16 x i1> %.not49.i, %.not51.i
+ %i2 = bitcast <16 x i1> %.not46.i to i16
+ %_0.i = icmp eq i16 %i2, 0
+ br i1 %_0.i, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+attributes #0 = { "target-features"="+soft-float" }
diff --git a/llvm/test/CodeGen/X86/prefer-avx256-shift.ll b/llvm/test/CodeGen/X86/prefer-avx256-shift.ll
index bf04c8d..63bbac12 100644
--- a/llvm/test/CodeGen/X86/prefer-avx256-shift.ll
+++ b/llvm/test/CodeGen/X86/prefer-avx256-shift.ll
@@ -302,10 +302,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX256-NEXT: vpsraw $4, %ymm3, %ymm4
; AVX256-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX256-NEXT: vpsraw $2, %ymm3, %ymm4
-; AVX256-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; AVX256-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX256-NEXT: vpaddw %ymm2, %ymm2, %ymm5
+; AVX256-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3
; AVX256-NEXT: vpsraw $1, %ymm3, %ymm4
-; AVX256-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX256-NEXT: vpsllw $2, %ymm2, %ymm2
; AVX256-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; AVX256-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX256-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -313,10 +313,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX256-NEXT: vpsraw $4, %ymm0, %ymm3
; AVX256-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX256-NEXT: vpsraw $2, %ymm0, %ymm3
-; AVX256-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; AVX256-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX256-NEXT: vpaddw %ymm1, %ymm1, %ymm4
+; AVX256-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
; AVX256-NEXT: vpsraw $1, %ymm0, %ymm3
-; AVX256-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX256-NEXT: vpsllw $2, %ymm1, %ymm1
; AVX256-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX256-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX256-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
@@ -338,10 +338,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX512VL-NEXT: vpsraw $4, %ymm3, %ymm4
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX512VL-NEXT: vpsraw $2, %ymm3, %ymm4
-; AVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm5
+; AVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3
; AVX512VL-NEXT: vpsraw $1, %ymm3, %ymm4
-; AVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm2
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -349,10 +349,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX512VL-NEXT: vpsraw $4, %ymm0, %ymm3
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsraw $2, %ymm0, %ymm3
-; AVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm4
+; AVX512VL-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsraw $1, %ymm0, %ymm3
-; AVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
@@ -432,10 +432,10 @@ define <16 x i8> @var_ashr_v16i8(<16 x i8> %a, <16 x i8> %b) {
; AVX256VL-NEXT: vpsraw $4, %xmm3, %xmm4
; AVX256VL-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
; AVX256VL-NEXT: vpsraw $2, %xmm3, %xmm4
-; AVX256VL-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX256VL-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX256VL-NEXT: vpaddw %xmm2, %xmm2, %xmm5
+; AVX256VL-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
; AVX256VL-NEXT: vpsraw $1, %xmm3, %xmm4
-; AVX256VL-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX256VL-NEXT: vpsllw $2, %xmm2, %xmm2
; AVX256VL-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
; AVX256VL-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX256VL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -443,10 +443,10 @@ define <16 x i8> @var_ashr_v16i8(<16 x i8> %a, <16 x i8> %b) {
; AVX256VL-NEXT: vpsraw $4, %xmm0, %xmm3
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX256VL-NEXT: vpsraw $2, %xmm0, %xmm3
-; AVX256VL-NEXT: vpaddw %xmm1, %xmm1, %xmm1
-; AVX256VL-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX256VL-NEXT: vpaddw %xmm1, %xmm1, %xmm4
+; AVX256VL-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
; AVX256VL-NEXT: vpsraw $1, %xmm0, %xmm3
-; AVX256VL-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; AVX256VL-NEXT: vpsllw $2, %xmm1, %xmm1
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX256VL-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX256VL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/shuffle-as-shifts.ll b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll
index 9c8729b3..4b8f78d 100644
--- a/llvm/test/CodeGen/X86/shuffle-as-shifts.ll
+++ b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll
@@ -15,20 +15,20 @@ define <4 x i32> @shuf_rot_v4i32_1032(<4 x i32> %x) {
;
; CHECK-ICX-LABEL: shuf_rot_v4i32_1032:
; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-ICX-NEXT: retq
;
; CHECK-V4-LABEL: shuf_rot_v4i32_1032:
; CHECK-V4: # %bb.0:
-; CHECK-V4-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-V4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; CHECK-V4-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-V4-NEXT: retq
;
; CHECK-ZNVER4-LABEL: shuf_rot_v4i32_1032:
; CHECK-ZNVER4: # %bb.0:
-; CHECK-ZNVER4-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; CHECK-ZNVER4-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-ZNVER4-NEXT: retq
%x1 = add <4 x i32> %x, %x
%r = shufflevector <4 x i32> %x1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
@@ -44,20 +44,20 @@ define <8 x i32> @shuf_rot_v8i32_10325476(<8 x i32> %x) {
;
; CHECK-ICX-LABEL: shuf_rot_v8i32_10325476:
; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
+; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-ICX-NEXT: retq
;
; CHECK-V4-LABEL: shuf_rot_v8i32_10325476:
; CHECK-V4: # %bb.0:
-; CHECK-V4-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-V4-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
+; CHECK-V4-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-V4-NEXT: retq
;
; CHECK-ZNVER4-LABEL: shuf_rot_v8i32_10325476:
; CHECK-ZNVER4: # %bb.0:
-; CHECK-ZNVER4-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
+; CHECK-ZNVER4-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-ZNVER4-NEXT: retq
%x1 = add <8 x i32> %x, %x
%r = shufflevector <8 x i32> %x1, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
@@ -73,20 +73,20 @@ define <16 x i32> @shuf_rot_v16i32_1032547698111013121514(<16 x i32> %x) {
;
; CHECK-ICX-LABEL: shuf_rot_v16i32_1032547698111013121514:
; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-ICX-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-ICX-NEXT: retq
;
; CHECK-V4-LABEL: shuf_rot_v16i32_1032547698111013121514:
; CHECK-V4: # %bb.0:
-; CHECK-V4-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-V4-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-V4-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-V4-NEXT: retq
;
; CHECK-ZNVER4-LABEL: shuf_rot_v16i32_1032547698111013121514:
; CHECK-ZNVER4: # %bb.0:
-; CHECK-ZNVER4-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-ZNVER4-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-ZNVER4-NEXT: retq
%x1 = add <16 x i32> %x, %x
%r = shufflevector <16 x i32> %x1, <16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -168,20 +168,20 @@ define <4 x i32> @shuf_shr_v4i32_1U3U(<4 x i32> %x) {
;
; CHECK-ICX-LABEL: shuf_shr_v4i32_1U3U:
; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-ICX-NEXT: retq
;
; CHECK-V4-LABEL: shuf_shr_v4i32_1U3U:
; CHECK-V4: # %bb.0:
-; CHECK-V4-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-V4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-V4-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-V4-NEXT: retq
;
; CHECK-ZNVER4-LABEL: shuf_shr_v4i32_1U3U:
; CHECK-ZNVER4: # %bb.0:
-; CHECK-ZNVER4-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-ZNVER4-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-ZNVER4-NEXT: retq
%x1 = add <4 x i32> %x, %x
%r = shufflevector <4 x i32> %x1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 undef, i32 3, i32 undef>
@@ -197,20 +197,20 @@ define <8 x i32> @shuf_shr_v8i32_1U3U5U7U(<8 x i32> %x) {
;
; CHECK-ICX-LABEL: shuf_shr_v8i32_1U3U5U7U:
; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-ICX-NEXT: retq
;
; CHECK-V4-LABEL: shuf_shr_v8i32_1U3U5U7U:
; CHECK-V4: # %bb.0:
-; CHECK-V4-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-V4-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; CHECK-V4-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-V4-NEXT: retq
;
; CHECK-ZNVER4-LABEL: shuf_shr_v8i32_1U3U5U7U:
; CHECK-ZNVER4: # %bb.0:
-; CHECK-ZNVER4-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; CHECK-ZNVER4-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-ZNVER4-NEXT: retq
%x1 = add <8 x i32> %x, %x
%r = shufflevector <8 x i32> %x1, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7, i32 undef>
@@ -226,20 +226,20 @@ define <16 x i32> @shuf_shr_v16i32_U3U5U7U9U11U13U15(<16 x i32> %x) {
;
; CHECK-ICX-LABEL: shuf_shr_v16i32_U3U5U7U9U11U13U15:
; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-ICX-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-ICX-NEXT: retq
;
; CHECK-V4-LABEL: shuf_shr_v16i32_U3U5U7U9U11U13U15:
; CHECK-V4: # %bb.0:
-; CHECK-V4-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-V4-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-V4-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-V4-NEXT: retq
;
; CHECK-ZNVER4-LABEL: shuf_shr_v16i32_U3U5U7U9U11U13U15:
; CHECK-ZNVER4: # %bb.0:
-; CHECK-ZNVER4-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-ZNVER4-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-ZNVER4-NEXT: retq
%x1 = add <16 x i32> %x, %x
%r = shufflevector <16 x i32> %x1, <16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 9, i32 undef, i32 11, i32 undef, i32 13, i32 undef, i32 15, i32 undef>
@@ -288,20 +288,20 @@ define <4 x i32> @shuf_shl_v4i32_U0U2(<4 x i32> %x) {
;
; CHECK-ICX-LABEL: shuf_shl_v4i32_U0U2:
; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-ICX-NEXT: retq
;
; CHECK-V4-LABEL: shuf_shl_v4i32_U0U2:
; CHECK-V4: # %bb.0:
-; CHECK-V4-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-V4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; CHECK-V4-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-V4-NEXT: retq
;
; CHECK-ZNVER4-LABEL: shuf_shl_v4i32_U0U2:
; CHECK-ZNVER4: # %bb.0:
-; CHECK-ZNVER4-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; CHECK-ZNVER4-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-ZNVER4-NEXT: retq
%x1 = add <4 x i32> %x, %x
%r = shufflevector <4 x i32> %x1, <4 x i32> zeroinitializer, <4 x i32> <i32 undef, i32 0, i32 undef, i32 2>
@@ -317,20 +317,20 @@ define <8 x i32> @shuf_shl_v8i32_U0U2U4U6(<8 x i32> %x) {
;
; CHECK-ICX-LABEL: shuf_shl_v8i32_U0U2U4U6:
; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-ICX-NEXT: retq
;
; CHECK-V4-LABEL: shuf_shl_v8i32_U0U2U4U6:
; CHECK-V4: # %bb.0:
-; CHECK-V4-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-V4-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-V4-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-V4-NEXT: retq
;
; CHECK-ZNVER4-LABEL: shuf_shl_v8i32_U0U2U4U6:
; CHECK-ZNVER4: # %bb.0:
-; CHECK-ZNVER4-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-ZNVER4-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-ZNVER4-NEXT: retq
%x1 = add <8 x i32> %x, %x
%r = shufflevector <8 x i32> %x1, <8 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 undef, i32 4, i32 undef, i32 6>
@@ -346,20 +346,20 @@ define <16 x i32> @shuf_shl_v16i32_U0U2U4U6U8U10U12U14(<16 x i32> %x) {
;
; CHECK-ICX-LABEL: shuf_shl_v16i32_U0U2U4U6U8U10U12U14:
; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-ICX-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-ICX-NEXT: retq
;
; CHECK-V4-LABEL: shuf_shl_v16i32_U0U2U4U6U8U10U12U14:
; CHECK-V4: # %bb.0:
-; CHECK-V4-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-V4-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-V4-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-V4-NEXT: retq
;
; CHECK-ZNVER4-LABEL: shuf_shl_v16i32_U0U2U4U6U8U10U12U14:
; CHECK-ZNVER4: # %bb.0:
-; CHECK-ZNVER4-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-ZNVER4-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; CHECK-ZNVER4-NEXT: retq
%x1 = add <16 x i32> %x, %x
%r = shufflevector <16 x i32> %x1, <16 x i32> zeroinitializer, <16 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 undef, i32 4, i32 undef, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 undef, i32 12, i32 undef, i32 14>
diff --git a/llvm/test/CodeGen/X86/sshl_sat_vec.ll b/llvm/test/CodeGen/X86/sshl_sat_vec.ll
index f91758b..10dee14 100644
--- a/llvm/test/CodeGen/X86/sshl_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sshl_sat_vec.ll
@@ -602,10 +602,10 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; X64-AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; X64-AVX2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
; X64-AVX2-NEXT: vpsraw $2, %xmm3, %xmm4
-; X64-AVX2-NEXT: vpaddw %xmm5, %xmm5, %xmm5
-; X64-AVX2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
+; X64-AVX2-NEXT: vpaddw %xmm5, %xmm5, %xmm6
+; X64-AVX2-NEXT: vpblendvb %xmm6, %xmm4, %xmm3, %xmm3
; X64-AVX2-NEXT: vpsraw $1, %xmm3, %xmm4
-; X64-AVX2-NEXT: vpaddw %xmm5, %xmm5, %xmm5
+; X64-AVX2-NEXT: vpsllw $2, %xmm5, %xmm5
; X64-AVX2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
; X64-AVX2-NEXT: vpsrlw $8, %xmm3, %xmm3
; X64-AVX2-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -613,10 +613,10 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; X64-AVX2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm5, %xmm4, %xmm4
; X64-AVX2-NEXT: vpsraw $2, %xmm4, %xmm5
-; X64-AVX2-NEXT: vpaddw %xmm1, %xmm1, %xmm1
-; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm5, %xmm4, %xmm4
+; X64-AVX2-NEXT: vpaddw %xmm1, %xmm1, %xmm6
+; X64-AVX2-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
; X64-AVX2-NEXT: vpsraw $1, %xmm4, %xmm5
-; X64-AVX2-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpsllw $2, %xmm1, %xmm1
; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm5, %xmm4, %xmm1
; X64-AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
; X64-AVX2-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index 20be579..9b52857 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -536,14 +536,14 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm6
; AVX1-NEXT: vpblendvb %xmm4, %xmm6, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm4
-; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm4
-; AVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5
-; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm4
-; AVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5
-; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm6
+; AVX1-NEXT: vpblendvb %xmm5, %xmm6, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm5
+; AVX1-NEXT: vpsllw $2, %xmm4, %xmm6
+; AVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm5
+; AVX1-NEXT: vpsllw $3, %xmm4, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm2[4,4,5,5,6,6,7,7]
; AVX1-NEXT: vpslld $23, %xmm3, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index 1f16463..a387562 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -328,15 +328,15 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
; AVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm6
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
; AVX1-NEXT: vpsrlw $8, %xmm7, %xmm8
-; AVX1-NEXT: vpblendvb %xmm5, %xmm8, %xmm7, %xmm5
-; AVX1-NEXT: vpsrlw $4, %xmm5, %xmm7
-; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm5, %xmm5
-; AVX1-NEXT: vpsrlw $2, %xmm5, %xmm7
-; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm6
-; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm5, %xmm5
-; AVX1-NEXT: vpsrlw $1, %xmm5, %xmm7
-; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm6
-; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vpblendvb %xmm5, %xmm8, %xmm7, %xmm7
+; AVX1-NEXT: vpsrlw $4, %xmm7, %xmm8
+; AVX1-NEXT: vpblendvb %xmm6, %xmm8, %xmm7, %xmm6
+; AVX1-NEXT: vpsrlw $2, %xmm6, %xmm7
+; AVX1-NEXT: vpsllw $2, %xmm5, %xmm8
+; AVX1-NEXT: vpblendvb %xmm8, %xmm7, %xmm6, %xmm6
+; AVX1-NEXT: vpsrlw $1, %xmm6, %xmm7
+; AVX1-NEXT: vpsllw $3, %xmm5, %xmm5
+; AVX1-NEXT: vpblendvb %xmm5, %xmm7, %xmm6, %xmm5
; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm6
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm6[4,4,5,5,6,6,7,7]
; AVX1-NEXT: vpslld $23, %xmm4, %xmm7
@@ -358,14 +358,14 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm7
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm8
; AVX1-NEXT: vpblendvb %xmm6, %xmm8, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm6
-; AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm6
-; AVX1-NEXT: vpaddw %xmm7, %xmm7, %xmm7
-; AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm6
-; AVX1-NEXT: vpaddw %xmm7, %xmm7, %xmm7
-; AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm8
+; AVX1-NEXT: vpblendvb %xmm7, %xmm8, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm7
+; AVX1-NEXT: vpsllw $2, %xmm6, %xmm8
+; AVX1-NEXT: vpblendvb %xmm8, %xmm7, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm7
+; AVX1-NEXT: vpsllw $3, %xmm6, %xmm6
+; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm2[4,4,5,5,6,6,7,7]
; AVX1-NEXT: vpslld $23, %xmm3, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index 02f0f53..d565ef0 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -293,14 +293,14 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm3
; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $4, %xmm0, %xmm1
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $4, %xmm0, %xmm3
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $2, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $1, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shift_v8i16:
@@ -494,10 +494,10 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; AVX-NEXT: vpsraw $4, %xmm3, %xmm4
; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
; AVX-NEXT: vpsraw $2, %xmm3, %xmm4
-; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm5
+; AVX-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
; AVX-NEXT: vpsraw $1, %xmm3, %xmm4
-; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpsllw $2, %xmm2, %xmm2
; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -505,10 +505,10 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; AVX-NEXT: vpsraw $4, %xmm0, %xmm3
; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpsraw $2, %xmm0, %xmm3
-; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm4
+; AVX-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpsraw $1, %xmm0, %xmm3
-; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpsllw $2, %xmm1, %xmm1
; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 15855e3..249bcba 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -237,29 +237,29 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpsraw $8, %xmm4, %xmm5
-; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
-; AVX1-NEXT: vpsraw $4, %xmm2, %xmm4
-; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpsraw $2, %xmm2, %xmm4
-; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpsraw $1, %xmm2, %xmm4
-; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpsraw $4, %xmm4, %xmm5
+; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3
+; AVX1-NEXT: vpsraw $2, %xmm3, %xmm4
+; AVX1-NEXT: vpsllw $2, %xmm2, %xmm5
+; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsraw $1, %xmm3, %xmm4
+; AVX1-NEXT: vpsllw $3, %xmm2, %xmm2
+; AVX1-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
; AVX1-NEXT: vpsllw $12, %xmm1, %xmm3
; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm4
; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $4, %xmm0, %xmm1
-; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $4, %xmm0, %xmm4
+; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $2, %xmm0, %xmm3
+; AVX1-NEXT: vpsllw $2, %xmm1, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $1, %xmm0, %xmm3
+; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -339,29 +339,29 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; X86-AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; X86-AVX1-NEXT: vpsraw $8, %xmm4, %xmm5
-; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
-; X86-AVX1-NEXT: vpsraw $4, %xmm2, %xmm4
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; X86-AVX1-NEXT: vpsraw $2, %xmm2, %xmm4
-; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; X86-AVX1-NEXT: vpsraw $1, %xmm2, %xmm4
-; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; X86-AVX1-NEXT: vpsraw $4, %xmm4, %xmm5
+; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3
+; X86-AVX1-NEXT: vpsraw $2, %xmm3, %xmm4
+; X86-AVX1-NEXT: vpsllw $2, %xmm2, %xmm5
+; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsraw $1, %xmm3, %xmm4
+; X86-AVX1-NEXT: vpsllw $3, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
; X86-AVX1-NEXT: vpsllw $12, %xmm1, %xmm3
; X86-AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
; X86-AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
; X86-AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3
; X86-AVX1-NEXT: vpsraw $8, %xmm0, %xmm4
; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsraw $4, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsraw $2, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsraw $1, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsraw $4, %xmm0, %xmm4
+; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsraw $2, %xmm0, %xmm3
+; X86-AVX1-NEXT: vpsllw $2, %xmm1, %xmm4
+; X86-AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsraw $1, %xmm0, %xmm3
+; X86-AVX1-NEXT: vpsllw $3, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X86-AVX1-NEXT: retl
;
@@ -393,10 +393,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-NEXT: vpsraw $4, %xmm5, %xmm6
; AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5
; AVX1-NEXT: vpsraw $2, %xmm5, %xmm6
-; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5
+; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm7
+; AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm5, %xmm5
; AVX1-NEXT: vpsraw $1, %xmm5, %xmm6
-; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpsllw $2, %xmm3, %xmm3
; AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm3
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -404,10 +404,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-NEXT: vpsraw $4, %xmm4, %xmm5
; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vpsraw $2, %xmm4, %xmm5
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm6
+; AVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vpsraw $1, %xmm4, %xmm5
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpsllw $2, %xmm2, %xmm2
; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
@@ -417,10 +417,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-NEXT: vpsraw $4, %xmm4, %xmm5
; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vpsraw $2, %xmm4, %xmm5
-; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm6
+; AVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vpsraw $1, %xmm4, %xmm5
-; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpsllw $2, %xmm3, %xmm3
; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -428,10 +428,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-NEXT: vpsraw $4, %xmm0, %xmm4
; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpsraw $2, %xmm0, %xmm4
-; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm5
+; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpsraw $1, %xmm0, %xmm4
-; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpsllw $2, %xmm1, %xmm1
; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
@@ -446,10 +446,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX2-NEXT: vpsraw $4, %ymm3, %ymm4
; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpsraw $2, %ymm3, %ymm4
-; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm5
+; AVX2-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpsraw $1, %ymm3, %ymm4
-; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpsllw $2, %ymm2, %ymm2
; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -457,10 +457,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX2-NEXT: vpsraw $4, %ymm0, %ymm3
; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX2-NEXT: vpsraw $2, %ymm0, %ymm3
-; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm4
+; AVX2-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
; AVX2-NEXT: vpsraw $1, %ymm0, %ymm3
-; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpsllw $2, %ymm1, %ymm1
; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
@@ -498,10 +498,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpsraw $4, %ymm3, %ymm4
; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
-; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm5
+; AVX512DQ-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4
-; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsllw $2, %ymm2, %ymm2
; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -509,10 +509,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpsraw $4, %ymm0, %ymm3
; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm3
-; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm4
+; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm3
-; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
@@ -534,10 +534,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX512DQVL-NEXT: vpsraw $4, %ymm3, %ymm4
; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX512DQVL-NEXT: vpsraw $2, %ymm3, %ymm4
-; AVX512DQVL-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512DQVL-NEXT: vpaddw %ymm2, %ymm2, %ymm5
+; AVX512DQVL-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3
; AVX512DQVL-NEXT: vpsraw $1, %ymm3, %ymm4
-; AVX512DQVL-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512DQVL-NEXT: vpsllw $2, %ymm2, %ymm2
; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; AVX512DQVL-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -545,10 +545,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX512DQVL-NEXT: vpsraw $4, %ymm0, %ymm3
; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpsraw $2, %ymm0, %ymm3
-; AVX512DQVL-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpaddw %ymm1, %ymm1, %ymm4
+; AVX512DQVL-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpsraw $1, %ymm0, %ymm3
-; AVX512DQVL-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX512DQVL-NEXT: vpsllw $2, %ymm1, %ymm1
; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
@@ -572,10 +572,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; X86-AVX1-NEXT: vpsraw $4, %xmm5, %xmm6
; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5
; X86-AVX1-NEXT: vpsraw $2, %xmm5, %xmm6
-; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5
+; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm7
+; X86-AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm5, %xmm5
; X86-AVX1-NEXT: vpsraw $1, %xmm5, %xmm6
-; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsllw $2, %xmm3, %xmm3
; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm3
; X86-AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; X86-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -583,10 +583,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; X86-AVX1-NEXT: vpsraw $4, %xmm4, %xmm5
; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
; X86-AVX1-NEXT: vpsraw $2, %xmm4, %xmm5
-; X86-AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; X86-AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm6
+; X86-AVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
; X86-AVX1-NEXT: vpsraw $1, %xmm4, %xmm5
-; X86-AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsllw $2, %xmm2, %xmm2
; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
; X86-AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; X86-AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
@@ -596,10 +596,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; X86-AVX1-NEXT: vpsraw $4, %xmm4, %xmm5
; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4
; X86-AVX1-NEXT: vpsraw $2, %xmm4, %xmm5
-; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4
+; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm6
+; X86-AVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
; X86-AVX1-NEXT: vpsraw $1, %xmm4, %xmm5
-; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsllw $2, %xmm3, %xmm3
; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3
; X86-AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; X86-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -607,10 +607,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; X86-AVX1-NEXT: vpsraw $4, %xmm0, %xmm4
; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsraw $2, %xmm0, %xmm4
-; X86-AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm5
+; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsraw $1, %xmm0, %xmm4
-; X86-AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpsllw $2, %xmm1, %xmm1
; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
; X86-AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
@@ -625,10 +625,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; X86-AVX2-NEXT: vpsraw $4, %ymm3, %ymm4
; X86-AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; X86-AVX2-NEXT: vpsraw $2, %ymm3, %ymm4
-; X86-AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; X86-AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; X86-AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm5
+; X86-AVX2-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3
; X86-AVX2-NEXT: vpsraw $1, %ymm3, %ymm4
-; X86-AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; X86-AVX2-NEXT: vpsllw $2, %ymm2, %ymm2
; X86-AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; X86-AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
; X86-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -636,10 +636,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; X86-AVX2-NEXT: vpsraw $4, %ymm0, %ymm3
; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsraw $2, %ymm0, %ymm3
-; X86-AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm4
+; X86-AVX2-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsraw $1, %ymm0, %ymm3
-; X86-AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; X86-AVX2-NEXT: vpsllw $2, %ymm1, %ymm1
; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
; X86-AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
index ea0745b..0fb0420 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -59,10 +59,10 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm6
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm6, %ymm5, %ymm5
; AVX512DQ-NEXT: vpsraw $2, %ymm5, %ymm6
-; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm6, %ymm5, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm7
+; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm6, %ymm5, %ymm5
; AVX512DQ-NEXT: vpsraw $1, %ymm5, %ymm6
-; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpsllw $2, %ymm3, %ymm3
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm6, %ymm5, %ymm3
; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -70,10 +70,10 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4
; AVX512DQ-NEXT: vpsraw $2, %ymm4, %ymm5
-; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm6
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
; AVX512DQ-NEXT: vpsraw $1, %ymm4, %ymm5
-; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsllw $2, %ymm2, %ymm2
; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm2
; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm2, %ymm2
@@ -83,10 +83,10 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
; AVX512DQ-NEXT: vpsraw $2, %ymm4, %ymm5
-; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm6
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
; AVX512DQ-NEXT: vpsraw $1, %ymm4, %ymm5
-; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpsllw $2, %ymm3, %ymm3
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
@@ -94,10 +94,10 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpsraw $4, %ymm0, %ymm4
; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm4
-; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm5
+; AVX512DQ-NEXT: vpblendvb %ymm5, %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm4
-; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
@@ -113,11 +113,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
; AVX512BW-NEXT: vpsraw $2, %zmm2, %zmm3
-; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
-; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
+; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm5
+; AVX512BW-NEXT: vpmovb2m %zmm5, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
; AVX512BW-NEXT: vpsraw $1, %zmm2, %zmm3
-; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
+; AVX512BW-NEXT: vpsllw $2, %zmm4, %zmm4
; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
@@ -127,11 +127,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm3
-; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
+; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm4
+; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm3
-; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsllw $2, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
index f7de8d4..c5d3297 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
@@ -196,14 +196,14 @@ define <4 x i16> @var_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm3
; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $4, %xmm0, %xmm1
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $4, %xmm0, %xmm3
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $2, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $1, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shift_v4i16:
@@ -367,14 +367,14 @@ define <2 x i16> @var_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm3
; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $4, %xmm0, %xmm1
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $4, %xmm0, %xmm3
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $2, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsraw $1, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shift_v2i16:
@@ -568,10 +568,10 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
; AVX-NEXT: vpsraw $4, %xmm3, %xmm4
; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
; AVX-NEXT: vpsraw $2, %xmm3, %xmm4
-; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm5
+; AVX-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
; AVX-NEXT: vpsraw $1, %xmm3, %xmm4
-; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpsllw $2, %xmm2, %xmm2
; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -579,10 +579,10 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
; AVX-NEXT: vpsraw $4, %xmm0, %xmm3
; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpsraw $2, %xmm0, %xmm3
-; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm4
+; AVX-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpsraw $1, %xmm0, %xmm3
-; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpsllw $2, %xmm1, %xmm1
; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
@@ -796,10 +796,10 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
; AVX-NEXT: vpsraw $4, %xmm3, %xmm4
; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
; AVX-NEXT: vpsraw $2, %xmm3, %xmm4
-; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm5
+; AVX-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
; AVX-NEXT: vpsraw $1, %xmm3, %xmm4
-; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpsllw $2, %xmm2, %xmm2
; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -807,10 +807,10 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
; AVX-NEXT: vpsraw $4, %xmm0, %xmm3
; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpsraw $2, %xmm0, %xmm3
-; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm4
+; AVX-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpsraw $1, %xmm0, %xmm3
-; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpsllw $2, %xmm1, %xmm1
; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
@@ -1024,10 +1024,10 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
; AVX-NEXT: vpsraw $4, %xmm3, %xmm4
; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
; AVX-NEXT: vpsraw $2, %xmm3, %xmm4
-; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm5
+; AVX-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
; AVX-NEXT: vpsraw $1, %xmm3, %xmm4
-; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpsllw $2, %xmm2, %xmm2
; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -1035,10 +1035,10 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
; AVX-NEXT: vpsraw $4, %xmm0, %xmm3
; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpsraw $2, %xmm0, %xmm3
-; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm4
+; AVX-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpsraw $1, %xmm0, %xmm3
-; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpsllw $2, %xmm1, %xmm1
; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
index 1d1697a..8cb2c7b 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -262,14 +262,14 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3
; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shift_v8i16:
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index 3a4bb22..606adb4 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -198,29 +198,29 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm5
-; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm4
-; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm4
-; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm4
-; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpsrlw $4, %xmm4, %xmm5
+; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $2, %xmm3, %xmm4
+; AVX1-NEXT: vpsllw $2, %xmm2, %xmm5
+; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsrlw $1, %xmm3, %xmm4
+; AVX1-NEXT: vpsllw $3, %xmm2, %xmm2
+; AVX1-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
; AVX1-NEXT: vpsllw $12, %xmm1, %xmm3
; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm4
; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
+; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm3
+; AVX1-NEXT: vpsllw $2, %xmm1, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3
+; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -300,29 +300,29 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; X86-AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; X86-AVX1-NEXT: vpsrlw $8, %xmm4, %xmm5
-; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
-; X86-AVX1-NEXT: vpsrlw $4, %xmm2, %xmm4
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; X86-AVX1-NEXT: vpsrlw $2, %xmm2, %xmm4
-; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; X86-AVX1-NEXT: vpsrlw $1, %xmm2, %xmm4
-; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; X86-AVX1-NEXT: vpsrlw $4, %xmm4, %xmm5
+; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3
+; X86-AVX1-NEXT: vpsrlw $2, %xmm3, %xmm4
+; X86-AVX1-NEXT: vpsllw $2, %xmm2, %xmm5
+; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsrlw $1, %xmm3, %xmm4
+; X86-AVX1-NEXT: vpsllw $3, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
; X86-AVX1-NEXT: vpsllw $12, %xmm1, %xmm3
; X86-AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
; X86-AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
; X86-AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm4
; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
-; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
+; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $2, %xmm0, %xmm3
+; X86-AVX1-NEXT: vpsllw $2, %xmm1, %xmm4
+; X86-AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3
+; X86-AVX1-NEXT: vpsllw $3, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X86-AVX1-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
index 7928111..57874c4 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
@@ -196,14 +196,14 @@ define <4 x i16> @var_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3
; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shift_v4i16:
@@ -367,14 +367,14 @@ define <2 x i16> @var_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3
; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shift_v2i16:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
index dbbfaab..be41945 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -8079,14 +8079,14 @@ define <16 x i16> @pr43230(<16 x i16> %a, <16 x i16> %b) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3
; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq