diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
22 files changed, 1208 insertions, 860 deletions
diff --git a/llvm/test/CodeGen/X86/avx2-vector-shifts.ll b/llvm/test/CodeGen/X86/avx2-vector-shifts.ll index 983c69d..95c2eda 100644 --- a/llvm/test/CodeGen/X86/avx2-vector-shifts.ll +++ b/llvm/test/CodeGen/X86/avx2-vector-shifts.ll @@ -441,10 +441,10 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { ; CHECK-NEXT: vpsraw $4, %ymm3, %ymm4 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 ; CHECK-NEXT: vpsraw $2, %ymm3, %ymm4 -; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm5 +; CHECK-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 ; CHECK-NEXT: vpsraw $1, %ymm3, %ymm4 -; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; CHECK-NEXT: vpsllw $2, %ymm2, %ymm2 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 ; CHECK-NEXT: vpsrlw $8, %ymm2, %ymm2 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -452,10 +452,10 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { ; CHECK-NEXT: vpsraw $4, %ymm0, %ymm3 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; CHECK-NEXT: vpsraw $2, %ymm0, %ymm3 -; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm4 +; CHECK-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 ; CHECK-NEXT: vpsraw $1, %ymm0, %ymm3 -; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; CHECK-NEXT: vpsllw $2, %ymm1, %ymm1 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; CHECK-NEXT: vpsrlw $8, %ymm0, %ymm0 ; CHECK-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/gfni-shifts.ll b/llvm/test/CodeGen/X86/gfni-shifts.ll index cd16651..feac3dc 100644 --- a/llvm/test/CodeGen/X86/gfni-shifts.ll +++ b/llvm/test/CodeGen/X86/gfni-shifts.ll @@ -166,10 +166,10 @@ define <16 x i8> @var_ashr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; GFNIAVX1OR2-NEXT: vpsraw $4, %xmm3, %xmm4 ; GFNIAVX1OR2-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 ; GFNIAVX1OR2-NEXT: vpsraw $2, %xmm3, %xmm4 -; GFNIAVX1OR2-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; GFNIAVX1OR2-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 +; GFNIAVX1OR2-NEXT: vpaddw %xmm2, %xmm2, %xmm5 +; GFNIAVX1OR2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 ; GFNIAVX1OR2-NEXT: vpsraw $1, %xmm3, %xmm4 -; GFNIAVX1OR2-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; GFNIAVX1OR2-NEXT: vpsllw $2, %xmm2, %xmm2 ; GFNIAVX1OR2-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 ; GFNIAVX1OR2-NEXT: vpsrlw $8, %xmm2, %xmm2 ; GFNIAVX1OR2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -177,10 +177,10 @@ define <16 x i8> @var_ashr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; GFNIAVX1OR2-NEXT: vpsraw $4, %xmm0, %xmm3 ; GFNIAVX1OR2-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: vpsraw $2, %xmm0, %xmm3 -; GFNIAVX1OR2-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; GFNIAVX1OR2-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 +; GFNIAVX1OR2-NEXT: vpaddw %xmm1, %xmm1, %xmm4 +; GFNIAVX1OR2-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: vpsraw $1, %xmm0, %xmm3 -; GFNIAVX1OR2-NEXT: vpaddw %xmm1, %xmm1, %xmm1 +; GFNIAVX1OR2-NEXT: vpsllw $2, %xmm1, %xmm1 ; GFNIAVX1OR2-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -896,10 +896,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX1-NEXT: vpsraw $4, %xmm5, %xmm6 ; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5 ; GFNIAVX1-NEXT: vpsraw $2, %xmm5, %xmm6 -; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5 +; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm7 +; GFNIAVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm5, %xmm5 ; GFNIAVX1-NEXT: vpsraw $1, %xmm5, %xmm6 -; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 +; GFNIAVX1-NEXT: vpsllw $2, %xmm3, %xmm3 ; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm3 ; GFNIAVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -907,10 +907,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX1-NEXT: vpsraw $4, %xmm4, %xmm5 ; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 ; GFNIAVX1-NEXT: vpsraw $2, %xmm4, %xmm5 -; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 +; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm6 +; GFNIAVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4 ; GFNIAVX1-NEXT: vpsraw $1, %xmm4, %xmm5 -; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; GFNIAVX1-NEXT: vpsllw $2, %xmm2, %xmm2 ; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2 ; GFNIAVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; GFNIAVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 @@ -920,10 +920,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX1-NEXT: vpsraw $4, %xmm4, %xmm5 ; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4 ; GFNIAVX1-NEXT: vpsraw $2, %xmm4, %xmm5 -; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4 +; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm6 +; GFNIAVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4 ; GFNIAVX1-NEXT: vpsraw $1, %xmm4, %xmm5 -; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 +; GFNIAVX1-NEXT: vpsllw $2, %xmm3, %xmm3 ; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3 ; GFNIAVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -931,10 +931,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX1-NEXT: vpsraw $4, %xmm0, %xmm4 ; GFNIAVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 ; GFNIAVX1-NEXT: vpsraw $2, %xmm0, %xmm4 -; GFNIAVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; GFNIAVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 +; GFNIAVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm5 +; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0 ; GFNIAVX1-NEXT: vpsraw $1, %xmm0, %xmm4 -; GFNIAVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1 +; GFNIAVX1-NEXT: vpsllw $2, %xmm1, %xmm1 ; GFNIAVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 ; GFNIAVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 @@ -949,10 +949,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX2-NEXT: vpsraw $4, %ymm3, %ymm4 ; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 ; GFNIAVX2-NEXT: vpsraw $2, %ymm3, %ymm4 -; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm5 +; GFNIAVX2-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 ; GFNIAVX2-NEXT: vpsraw $1, %ymm3, %ymm4 -; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; GFNIAVX2-NEXT: vpsllw $2, %ymm2, %ymm2 ; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 ; GFNIAVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 ; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -960,10 +960,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX2-NEXT: vpsraw $4, %ymm0, %ymm3 ; GFNIAVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpsraw $2, %ymm0, %ymm3 -; GFNIAVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; GFNIAVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm4 +; GFNIAVX2-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpsraw $1, %ymm0, %ymm3 -; GFNIAVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; GFNIAVX2-NEXT: vpsllw $2, %ymm1, %ymm1 ; GFNIAVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 @@ -977,10 +977,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX512VL-NEXT: vpsraw $4, %ymm3, %ymm4 ; GFNIAVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 ; GFNIAVX512VL-NEXT: vpsraw $2, %ymm3, %ymm4 -; GFNIAVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; GFNIAVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; GFNIAVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm5 +; GFNIAVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 ; GFNIAVX512VL-NEXT: vpsraw $1, %ymm3, %ymm4 -; GFNIAVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; GFNIAVX512VL-NEXT: vpsllw $2, %ymm2, %ymm2 ; GFNIAVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -988,10 +988,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX512VL-NEXT: vpsraw $4, %ymm0, %ymm3 ; GFNIAVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpsraw $2, %ymm0, %ymm3 -; GFNIAVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; GFNIAVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; GFNIAVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm4 +; GFNIAVX512VL-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpsraw $1, %ymm0, %ymm3 -; GFNIAVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; GFNIAVX512VL-NEXT: vpsllw $2, %ymm1, %ymm1 ; GFNIAVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 @@ -2027,10 +2027,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX1-NEXT: vpsraw $4, %xmm7, %xmm8 ; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm8, %xmm7, %xmm7 ; GFNIAVX1-NEXT: vpsraw $2, %xmm7, %xmm8 -; GFNIAVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5 -; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm8, %xmm7, %xmm7 +; GFNIAVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm9 +; GFNIAVX1-NEXT: vpblendvb %xmm9, %xmm8, %xmm7, %xmm7 ; GFNIAVX1-NEXT: vpsraw $1, %xmm7, %xmm8 -; GFNIAVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5 +; GFNIAVX1-NEXT: vpsllw $2, %xmm5, %xmm5 ; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm8, %xmm7, %xmm5 ; GFNIAVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -2038,10 +2038,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX1-NEXT: vpsraw $4, %xmm6, %xmm7 ; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm7, %xmm6, %xmm6 ; GFNIAVX1-NEXT: vpsraw $2, %xmm6, %xmm7 -; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4 -; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm7, %xmm6, %xmm6 +; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm8 +; GFNIAVX1-NEXT: vpblendvb %xmm8, %xmm7, %xmm6, %xmm6 ; GFNIAVX1-NEXT: vpsraw $1, %xmm6, %xmm7 -; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4 +; GFNIAVX1-NEXT: vpsllw $2, %xmm4, %xmm4 ; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm7, %xmm6, %xmm4 ; GFNIAVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; GFNIAVX1-NEXT: vpackuswb %xmm5, %xmm4, %xmm4 @@ -2051,10 +2051,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX1-NEXT: vpsraw $4, %xmm6, %xmm7 ; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm7, %xmm6, %xmm6 ; GFNIAVX1-NEXT: vpsraw $2, %xmm6, %xmm7 -; GFNIAVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5 -; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm7, %xmm6, %xmm6 +; GFNIAVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm8 +; GFNIAVX1-NEXT: vpblendvb %xmm8, %xmm7, %xmm6, %xmm6 ; GFNIAVX1-NEXT: vpsraw $1, %xmm6, %xmm7 -; GFNIAVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5 +; GFNIAVX1-NEXT: vpsllw $2, %xmm5, %xmm5 ; GFNIAVX1-NEXT: vpblendvb %xmm5, %xmm7, %xmm6, %xmm5 ; GFNIAVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -2062,10 +2062,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX1-NEXT: vpsraw $4, %xmm0, %xmm6 ; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm6, %xmm0, %xmm0 ; GFNIAVX1-NEXT: vpsraw $2, %xmm0, %xmm6 -; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm6, %xmm0, %xmm0 +; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm7 +; GFNIAVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm0, %xmm0 ; GFNIAVX1-NEXT: vpsraw $1, %xmm0, %xmm6 -; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; GFNIAVX1-NEXT: vpsllw $2, %xmm2, %xmm2 ; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm6, %xmm0, %xmm0 ; GFNIAVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX1-NEXT: vpackuswb %xmm5, %xmm0, %xmm0 @@ -2078,10 +2078,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX1-NEXT: vpsraw $4, %xmm6, %xmm7 ; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm7, %xmm6, %xmm6 ; GFNIAVX1-NEXT: vpsraw $2, %xmm6, %xmm7 -; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4 -; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm7, %xmm6, %xmm6 +; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm8 +; GFNIAVX1-NEXT: vpblendvb %xmm8, %xmm7, %xmm6, %xmm6 ; GFNIAVX1-NEXT: vpsraw $1, %xmm6, %xmm7 -; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4 +; GFNIAVX1-NEXT: vpsllw $2, %xmm4, %xmm4 ; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm7, %xmm6, %xmm4 ; GFNIAVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -2089,10 +2089,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX1-NEXT: vpsraw $4, %xmm5, %xmm6 ; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm6, %xmm5, %xmm5 ; GFNIAVX1-NEXT: vpsraw $2, %xmm5, %xmm6 -; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm6, %xmm5, %xmm5 +; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm7 +; GFNIAVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm5, %xmm5 ; GFNIAVX1-NEXT: vpsraw $1, %xmm5, %xmm6 -; GFNIAVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; GFNIAVX1-NEXT: vpsllw $2, %xmm2, %xmm2 ; GFNIAVX1-NEXT: vpblendvb %xmm2, %xmm6, %xmm5, %xmm2 ; GFNIAVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; GFNIAVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 @@ -2102,10 +2102,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX1-NEXT: vpsraw $4, %xmm5, %xmm6 ; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm6, %xmm5, %xmm5 ; GFNIAVX1-NEXT: vpsraw $2, %xmm5, %xmm6 -; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4 -; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm6, %xmm5, %xmm5 +; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm7 +; GFNIAVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm5, %xmm5 ; GFNIAVX1-NEXT: vpsraw $1, %xmm5, %xmm6 -; GFNIAVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4 +; GFNIAVX1-NEXT: vpsllw $2, %xmm4, %xmm4 ; GFNIAVX1-NEXT: vpblendvb %xmm4, %xmm6, %xmm5, %xmm4 ; GFNIAVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -2113,10 +2113,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX1-NEXT: vpsraw $4, %xmm1, %xmm5 ; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm1, %xmm1 ; GFNIAVX1-NEXT: vpsraw $2, %xmm1, %xmm5 -; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm1, %xmm1 +; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm6 +; GFNIAVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1 ; GFNIAVX1-NEXT: vpsraw $1, %xmm1, %xmm5 -; GFNIAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 +; GFNIAVX1-NEXT: vpsllw $2, %xmm3, %xmm3 ; GFNIAVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm1, %xmm1 ; GFNIAVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; GFNIAVX1-NEXT: vpackuswb %xmm4, %xmm1, %xmm1 @@ -2131,10 +2131,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX2-NEXT: vpsraw $4, %ymm5, %ymm6 ; GFNIAVX2-NEXT: vpblendvb %ymm4, %ymm6, %ymm5, %ymm5 ; GFNIAVX2-NEXT: vpsraw $2, %ymm5, %ymm6 -; GFNIAVX2-NEXT: vpaddw %ymm4, %ymm4, %ymm4 -; GFNIAVX2-NEXT: vpblendvb %ymm4, %ymm6, %ymm5, %ymm5 +; GFNIAVX2-NEXT: vpaddw %ymm4, %ymm4, %ymm7 +; GFNIAVX2-NEXT: vpblendvb %ymm7, %ymm6, %ymm5, %ymm5 ; GFNIAVX2-NEXT: vpsraw $1, %ymm5, %ymm6 -; GFNIAVX2-NEXT: vpaddw %ymm4, %ymm4, %ymm4 +; GFNIAVX2-NEXT: vpsllw $2, %ymm4, %ymm4 ; GFNIAVX2-NEXT: vpblendvb %ymm4, %ymm6, %ymm5, %ymm4 ; GFNIAVX2-NEXT: vpsrlw $8, %ymm4, %ymm4 ; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -2142,10 +2142,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX2-NEXT: vpsraw $4, %ymm0, %ymm5 ; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpsraw $2, %ymm0, %ymm5 -; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm6 +; GFNIAVX2-NEXT: vpblendvb %ymm6, %ymm5, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpsraw $1, %ymm0, %ymm5 -; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; GFNIAVX2-NEXT: vpsllw $2, %ymm2, %ymm2 ; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 @@ -2155,10 +2155,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX2-NEXT: vpsraw $4, %ymm4, %ymm5 ; GFNIAVX2-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4 ; GFNIAVX2-NEXT: vpsraw $2, %ymm4, %ymm5 -; GFNIAVX2-NEXT: vpaddw %ymm3, %ymm3, %ymm3 -; GFNIAVX2-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4 +; GFNIAVX2-NEXT: vpaddw %ymm3, %ymm3, %ymm6 +; GFNIAVX2-NEXT: vpblendvb %ymm6, %ymm5, %ymm4, %ymm4 ; GFNIAVX2-NEXT: vpsraw $1, %ymm4, %ymm5 -; GFNIAVX2-NEXT: vpaddw %ymm3, %ymm3, %ymm3 +; GFNIAVX2-NEXT: vpsllw $2, %ymm3, %ymm3 ; GFNIAVX2-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3 ; GFNIAVX2-NEXT: vpsrlw $8, %ymm3, %ymm3 ; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -2166,10 +2166,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX2-NEXT: vpsraw $4, %ymm1, %ymm4 ; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1 ; GFNIAVX2-NEXT: vpsraw $2, %ymm1, %ymm4 -; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1 +; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm5 +; GFNIAVX2-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1 ; GFNIAVX2-NEXT: vpsraw $1, %ymm1, %ymm4 -; GFNIAVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; GFNIAVX2-NEXT: vpsllw $2, %ymm2, %ymm2 ; GFNIAVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1 ; GFNIAVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; GFNIAVX2-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 @@ -2185,10 +2185,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX512VL-NEXT: vpsraw $4, %ymm5, %ymm6 ; GFNIAVX512VL-NEXT: vpblendvb %ymm3, %ymm6, %ymm5, %ymm5 ; GFNIAVX512VL-NEXT: vpsraw $2, %ymm5, %ymm6 -; GFNIAVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm3 -; GFNIAVX512VL-NEXT: vpblendvb %ymm3, %ymm6, %ymm5, %ymm5 +; GFNIAVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm7 +; GFNIAVX512VL-NEXT: vpblendvb %ymm7, %ymm6, %ymm5, %ymm5 ; GFNIAVX512VL-NEXT: vpsraw $1, %ymm5, %ymm6 -; GFNIAVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm3 +; GFNIAVX512VL-NEXT: vpsllw $2, %ymm3, %ymm3 ; GFNIAVX512VL-NEXT: vpblendvb %ymm3, %ymm6, %ymm5, %ymm3 ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -2196,10 +2196,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX512VL-NEXT: vpsraw $4, %ymm4, %ymm5 ; GFNIAVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4 ; GFNIAVX512VL-NEXT: vpsraw $2, %ymm4, %ymm5 -; GFNIAVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; GFNIAVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4 +; GFNIAVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm6 +; GFNIAVX512VL-NEXT: vpblendvb %ymm6, %ymm5, %ymm4, %ymm4 ; GFNIAVX512VL-NEXT: vpsraw $1, %ymm4, %ymm5 -; GFNIAVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; GFNIAVX512VL-NEXT: vpsllw $2, %ymm2, %ymm2 ; GFNIAVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm2 ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 ; GFNIAVX512VL-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 @@ -2209,10 +2209,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX512VL-NEXT: vpsraw $4, %ymm4, %ymm5 ; GFNIAVX512VL-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4 ; GFNIAVX512VL-NEXT: vpsraw $2, %ymm4, %ymm5 -; GFNIAVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm3 -; GFNIAVX512VL-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4 +; GFNIAVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm6 +; GFNIAVX512VL-NEXT: vpblendvb %ymm6, %ymm5, %ymm4, %ymm4 ; GFNIAVX512VL-NEXT: vpsraw $1, %ymm4, %ymm5 -; GFNIAVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm3 +; GFNIAVX512VL-NEXT: vpsllw $2, %ymm3, %ymm3 ; GFNIAVX512VL-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3 ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -2220,10 +2220,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX512VL-NEXT: vpsraw $4, %ymm0, %ymm4 ; GFNIAVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpsraw $2, %ymm0, %ymm4 -; GFNIAVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; GFNIAVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 +; GFNIAVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm5 +; GFNIAVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpsraw $1, %ymm0, %ymm4 -; GFNIAVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; GFNIAVX512VL-NEXT: vpsllw $2, %ymm1, %ymm1 ; GFNIAVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 @@ -2239,11 +2239,11 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX512BW-NEXT: vpmovb2m %zmm4, %k1 ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} ; GFNIAVX512BW-NEXT: vpsraw $2, %zmm2, %zmm3 -; GFNIAVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4 -; GFNIAVX512BW-NEXT: vpmovb2m %zmm4, %k1 +; GFNIAVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm5 +; GFNIAVX512BW-NEXT: vpmovb2m %zmm5, %k1 ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} ; GFNIAVX512BW-NEXT: vpsraw $1, %zmm2, %zmm3 -; GFNIAVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4 +; GFNIAVX512BW-NEXT: vpsllw $2, %zmm4, %zmm4 ; GFNIAVX512BW-NEXT: vpmovb2m %zmm4, %k1 ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} ; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 @@ -2253,11 +2253,11 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; GFNIAVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1} ; GFNIAVX512BW-NEXT: vpsraw $2, %zmm0, %zmm3 -; GFNIAVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1 -; GFNIAVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; GFNIAVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm4 +; GFNIAVX512BW-NEXT: vpmovb2m %zmm4, %k1 ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1} ; GFNIAVX512BW-NEXT: vpsraw $1, %zmm0, %zmm3 -; GFNIAVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1 +; GFNIAVX512BW-NEXT: vpsllw $2, %zmm1, %zmm1 ; GFNIAVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; GFNIAVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1} ; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll index 960bbf5..df04b67 100644 --- a/llvm/test/CodeGen/X86/isel-fpclass.ll +++ b/llvm/test/CodeGen/X86/isel-fpclass.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86-SDAGISEL +; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL ; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X86-FASTISEL ; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-FASTISEL +; RUN: llc < %s -mtriple=i686-linux -global-isel -global-isel-abort=2 | FileCheck %s -check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-linux -global-isel -global-isel-abort=2 | FileCheck %s -check-prefixes=X64,X64-GISEL -; FIXME: We can reuse/delete llvm/test/CodeGen/X86/is_fpclass.ll when all patches are included. - -define i1 @isnone_f(float %x) { -; X86-SDAGISEL-LABEL: isnone_f: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: xorl %eax, %eax -; X86-SDAGISEL-NEXT: retl +define i1 @isnone_f(float %x) nounwind { +; X86-LABEL: isnone_f: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: isnone_f: ; X64: # %bb.0: # %entry @@ -28,11 +28,11 @@ entry: ret i1 %0 } -define i1 @isany_f(float %x) { -; X86-SDAGISEL-LABEL: isany_f: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: movb $1, %al -; X86-SDAGISEL-NEXT: retl +define i1 @isany_f(float %x) nounwind { +; X86-LABEL: isany_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movb $1, %al +; X86-NEXT: retl ; ; X64-LABEL: isany_f: ; X64: # %bb.0: # %entry @@ -50,17 +50,17 @@ entry: ret i1 %0 } -define i1 @issignaling_f(float %x) { -; X86-SDAGISEL-LABEL: issignaling_f: -; X86-SDAGISEL: # %bb.0: -; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X86-SDAGISEL-NEXT: setl %cl -; X86-SDAGISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; X86-SDAGISEL-NEXT: setge %al -; X86-SDAGISEL-NEXT: andb %cl, %al -; X86-SDAGISEL-NEXT: retl +define i1 @issignaling_f(float %x) nounwind { +; X86-LABEL: issignaling_f: +; X86: # %bb.0: +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setl %cl +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %al +; X86-NEXT: andb %cl, %al +; X86-NEXT: retl ; ; X64-LABEL: issignaling_f: ; X64: # %bb.0: @@ -76,7 +76,6 @@ define i1 @issignaling_f(float %x) { ; X86-FASTISEL-LABEL: issignaling_f: ; X86-FASTISEL: # %bb.0: ; X86-FASTISEL-NEXT: pushl %eax -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 ; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) ; X86-FASTISEL-NEXT: fstps (%esp) ; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF @@ -87,20 +86,19 @@ define i1 @issignaling_f(float %x) { ; X86-FASTISEL-NEXT: setge %al ; X86-FASTISEL-NEXT: andb %cl, %al ; X86-FASTISEL-NEXT: popl %ecx -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 ; X86-FASTISEL-NEXT: retl %a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan" ret i1 %a0 } - define i1 @isquiet_f(float %x) { -; X86-SDAGISEL-LABEL: isquiet_f: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X86-SDAGISEL-NEXT: setge %al -; X86-SDAGISEL-NEXT: retl + define i1 @isquiet_f(float %x) nounwind { +; X86-LABEL: isquiet_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setge %al +; X86-NEXT: retl ; ; X64-LABEL: isquiet_f: ; X64: # %bb.0: # %entry @@ -113,7 +111,6 @@ define i1 @issignaling_f(float %x) { ; X86-FASTISEL-LABEL: isquiet_f: ; X86-FASTISEL: # %bb.0: # %entry ; X86-FASTISEL-NEXT: pushl %eax -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 ; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) ; X86-FASTISEL-NEXT: fstps (%esp) ; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF @@ -121,21 +118,20 @@ define i1 @issignaling_f(float %x) { ; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 ; X86-FASTISEL-NEXT: setge %al ; X86-FASTISEL-NEXT: popl %ecx -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 ; X86-FASTISEL-NEXT: retl entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan" ret i1 %0 } -define i1 @not_isquiet_f(float %x) { -; X86-SDAGISEL-LABEL: not_isquiet_f: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X86-SDAGISEL-NEXT: setl %al -; X86-SDAGISEL-NEXT: retl +define i1 @not_isquiet_f(float %x) nounwind { +; X86-LABEL: not_isquiet_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setl %al +; X86-NEXT: retl ; ; X64-LABEL: not_isquiet_f: ; X64: # %bb.0: # %entry @@ -148,7 +144,6 @@ define i1 @not_isquiet_f(float %x) { ; X86-FASTISEL-LABEL: not_isquiet_f: ; X86-FASTISEL: # %bb.0: # %entry ; X86-FASTISEL-NEXT: pushl %eax -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 ; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) ; X86-FASTISEL-NEXT: fstps (%esp) ; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF @@ -156,21 +151,20 @@ define i1 @not_isquiet_f(float %x) { ; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 ; X86-FASTISEL-NEXT: setl %al ; X86-FASTISEL-NEXT: popl %ecx -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 ; X86-FASTISEL-NEXT: retl entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan" ret i1 %0 } -define i1 @isinf_f(float %x) { -; X86-SDAGISEL-LABEL: isinf_f: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-SDAGISEL-NEXT: sete %al -; X86-SDAGISEL-NEXT: retl +define i1 @isinf_f(float %x) nounwind { +; X86-LABEL: isinf_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %al +; X86-NEXT: retl ; ; X64-LABEL: isinf_f: ; X64: # %bb.0: # %entry @@ -183,7 +177,6 @@ define i1 @isinf_f(float %x) { ; X86-FASTISEL-LABEL: isinf_f: ; X86-FASTISEL: # %bb.0: # %entry ; X86-FASTISEL-NEXT: pushl %eax -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 ; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) ; X86-FASTISEL-NEXT: fstps (%esp) ; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF @@ -191,21 +184,20 @@ define i1 @isinf_f(float %x) { ; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 ; X86-FASTISEL-NEXT: sete %al ; X86-FASTISEL-NEXT: popl %ecx -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 ; X86-FASTISEL-NEXT: retl entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf" ret i1 %0 } -define i1 @not_isinf_f(float %x) { -; X86-SDAGISEL-LABEL: not_isinf_f: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-SDAGISEL-NEXT: setne %al -; X86-SDAGISEL-NEXT: retl +define i1 @not_isinf_f(float %x) nounwind { +; X86-LABEL: not_isinf_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setne %al +; X86-NEXT: retl ; ; X64-LABEL: not_isinf_f: ; X64: # %bb.0: # %entry @@ -218,7 +210,6 @@ define i1 @not_isinf_f(float %x) { ; X86-FASTISEL-LABEL: not_isinf_f: ; X86-FASTISEL: # %bb.0: # %entry ; X86-FASTISEL-NEXT: pushl %eax -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 ; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) ; X86-FASTISEL-NEXT: fstps (%esp) ; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF @@ -226,19 +217,18 @@ define i1 @not_isinf_f(float %x) { ; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 ; X86-FASTISEL-NEXT: setne %al ; X86-FASTISEL-NEXT: popl %ecx -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 ; X86-FASTISEL-NEXT: retl entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf" ret i1 %0 } -define i1 @is_plus_inf_f(float %x) { -; X86-SDAGISEL-LABEL: is_plus_inf_f: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; X86-SDAGISEL-NEXT: sete %al -; X86-SDAGISEL-NEXT: retl +define i1 @is_plus_inf_f(float %x) nounwind { +; X86-LABEL: is_plus_inf_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-NEXT: sete %al +; X86-NEXT: retl ; ; X64-LABEL: is_plus_inf_f: ; X64: # %bb.0: # %entry @@ -250,25 +240,23 @@ define i1 @is_plus_inf_f(float %x) { ; X86-FASTISEL-LABEL: is_plus_inf_f: ; X86-FASTISEL: # %bb.0: # %entry ; X86-FASTISEL-NEXT: pushl %eax -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 ; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) ; X86-FASTISEL-NEXT: fstps (%esp) ; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000 ; X86-FASTISEL-NEXT: sete %al ; X86-FASTISEL-NEXT: popl %ecx -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 ; X86-FASTISEL-NEXT: retl entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf" ret i1 %0 } -define i1 @is_minus_inf_f(float %x) { -; X86-SDAGISEL-LABEL: is_minus_inf_f: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 -; X86-SDAGISEL-NEXT: sete %al -; X86-SDAGISEL-NEXT: retl +define i1 @is_minus_inf_f(float %x) nounwind { +; X86-LABEL: is_minus_inf_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-NEXT: sete %al +; X86-NEXT: retl ; ; X64-LABEL: is_minus_inf_f: ; X64: # %bb.0: # %entry @@ -280,25 +268,23 @@ define i1 @is_minus_inf_f(float %x) { ; X86-FASTISEL-LABEL: is_minus_inf_f: ; X86-FASTISEL: # %bb.0: # %entry ; X86-FASTISEL-NEXT: pushl %eax -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 ; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) ; X86-FASTISEL-NEXT: fstps (%esp) ; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000 ; X86-FASTISEL-NEXT: sete %al ; X86-FASTISEL-NEXT: popl %ecx -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 ; X86-FASTISEL-NEXT: retl entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf" ret i1 %0 } -define i1 @not_is_minus_inf_f(float %x) { -; X86-SDAGISEL-LABEL: not_is_minus_inf_f: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 -; X86-SDAGISEL-NEXT: setne %al -; X86-SDAGISEL-NEXT: retl +define i1 @not_is_minus_inf_f(float %x) nounwind { +; X86-LABEL: not_is_minus_inf_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-NEXT: setne %al +; X86-NEXT: retl ; ; X64-LABEL: not_is_minus_inf_f: ; X64: # %bb.0: # %entry @@ -310,27 +296,25 @@ define i1 @not_is_minus_inf_f(float %x) { ; X86-FASTISEL-LABEL: not_is_minus_inf_f: ; X86-FASTISEL: # %bb.0: # %entry ; X86-FASTISEL-NEXT: pushl %eax -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 ; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) ; X86-FASTISEL-NEXT: fstps (%esp) ; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000 ; X86-FASTISEL-NEXT: setne %al ; X86-FASTISEL-NEXT: popl %ecx -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 ; X86-FASTISEL-NEXT: retl entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf" ret i1 %0 } -define i1 @isfinite_f(float %x) { -; X86-SDAGISEL-LABEL: isfinite_f: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-SDAGISEL-NEXT: setl %al -; X86-SDAGISEL-NEXT: retl +define i1 @isfinite_f(float %x) nounwind { +; X86-LABEL: isfinite_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setl %al +; X86-NEXT: retl ; ; X64-LABEL: isfinite_f: ; X64: # %bb.0: # %entry @@ -343,7 +327,6 @@ define i1 @isfinite_f(float %x) { ; X86-FASTISEL-LABEL: isfinite_f: ; X86-FASTISEL: # %bb.0: # %entry ; X86-FASTISEL-NEXT: pushl %eax -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 ; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) ; X86-FASTISEL-NEXT: fstps (%esp) ; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF @@ -351,21 +334,20 @@ define i1 @isfinite_f(float %x) { ; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 ; X86-FASTISEL-NEXT: setl %al ; X86-FASTISEL-NEXT: popl %ecx -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 ; X86-FASTISEL-NEXT: retl entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" ret i1 %0 } -define i1 @not_isfinite_f(float %x) { -; X86-SDAGISEL-LABEL: not_isfinite_f: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-SDAGISEL-NEXT: setge %al -; X86-SDAGISEL-NEXT: retl +define i1 @not_isfinite_f(float %x) nounwind { +; X86-LABEL: not_isfinite_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setge %al +; X86-NEXT: retl ; ; X64-LABEL: not_isfinite_f: ; X64: # %bb.0: # %entry @@ -378,7 +360,6 @@ define i1 @not_isfinite_f(float %x) { ; X86-FASTISEL-LABEL: not_isfinite_f: ; X86-FASTISEL: # %bb.0: # %entry ; X86-FASTISEL-NEXT: pushl %eax -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 ; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) ; X86-FASTISEL-NEXT: fstps (%esp) ; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF @@ -386,19 +367,18 @@ define i1 @not_isfinite_f(float %x) { ; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 ; X86-FASTISEL-NEXT: setge %al ; X86-FASTISEL-NEXT: popl %ecx -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 ; X86-FASTISEL-NEXT: retl entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite" ret i1 %0 } -define i1 @is_plus_finite_f(float %x) { -; X86-SDAGISEL-LABEL: is_plus_finite_f: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; X86-SDAGISEL-NEXT: setb %al -; X86-SDAGISEL-NEXT: retl +define i1 @is_plus_finite_f(float %x) nounwind { +; X86-LABEL: is_plus_finite_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-NEXT: setb %al +; X86-NEXT: retl ; ; X64-LABEL: is_plus_finite_f: ; X64: # %bb.0: # %entry @@ -410,13 +390,11 @@ define i1 @is_plus_finite_f(float %x) { ; X86-FASTISEL-LABEL: is_plus_finite_f: ; X86-FASTISEL: # %bb.0: # %entry ; X86-FASTISEL-NEXT: pushl %eax -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 ; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) ; X86-FASTISEL-NEXT: fstps (%esp) ; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000 ; X86-FASTISEL-NEXT: setb %al ; X86-FASTISEL-NEXT: popl %ecx -; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 ; X86-FASTISEL-NEXT: retl entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite" @@ -424,10 +402,10 @@ entry: } define i1 @isnone_d(double %x) nounwind { -; X86-SDAGISEL-LABEL: isnone_d: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: xorl %eax, %eax -; X86-SDAGISEL-NEXT: retl +; X86-LABEL: isnone_d: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: isnone_d: ; X64: # %bb.0: # %entry @@ -446,10 +424,10 @@ entry: } define i1 @isany_d(double %x) nounwind { -; X86-SDAGISEL-LABEL: isany_d: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: movb $1, %al -; X86-SDAGISEL-NEXT: retl +; X86-LABEL: isany_d: +; X86: # %bb.0: # %entry +; X86-NEXT: movb $1, %al +; X86-NEXT: retl ; ; X64-LABEL: isany_d: ; X64: # %bb.0: # %entry @@ -468,10 +446,10 @@ entry: } define i1 @isnone_f80(x86_fp80 %x) nounwind { -; X86-SDAGISEL-LABEL: isnone_f80: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: xorl %eax, %eax -; X86-SDAGISEL-NEXT: retl +; X86-LABEL: isnone_f80: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; ; X64-SDAGISEL-LABEL: isnone_f80: ; X64-SDAGISEL: # %bb.0: # %entry @@ -491,16 +469,21 @@ define i1 @isnone_f80(x86_fp80 %x) nounwind { ; X64-FASTISEL-NEXT: fstp %st(0) ; X64-FASTISEL-NEXT: xorl %eax, %eax ; X64-FASTISEL-NEXT: retq +; +; X64-GISEL-LABEL: isnone_f80: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: xorl %eax, %eax +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 0) ret i1 %0 } define i1 @isany_f80(x86_fp80 %x) nounwind { -; X86-SDAGISEL-LABEL: isany_f80: -; X86-SDAGISEL: # %bb.0: # %entry -; X86-SDAGISEL-NEXT: movb $1, %al -; X86-SDAGISEL-NEXT: retl +; X86-LABEL: isany_f80: +; X86: # %bb.0: # %entry +; X86-NEXT: movb $1, %al +; X86-NEXT: retl ; ; X64-SDAGISEL-LABEL: isany_f80: ; X64-SDAGISEL: # %bb.0: # %entry @@ -520,6 +503,11 @@ define i1 @isany_f80(x86_fp80 %x) nounwind { ; X64-FASTISEL-NEXT: fstp %st(0) ; X64-FASTISEL-NEXT: movb $1, %al ; X64-FASTISEL-NEXT: retq +; +; X64-GISEL-LABEL: isany_f80: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movb $1, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 1023) ret i1 %0 diff --git a/llvm/test/CodeGen/X86/isel-smax.ll b/llvm/test/CodeGen/X86/isel-smax.ll index 9c9a48e..1ce0a80 100644 --- a/llvm/test/CodeGen/X86/isel-smax.ll +++ b/llvm/test/CodeGen/X86/isel-smax.ll @@ -1,19 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X64 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 -; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X86 -; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86,FASTISEL-X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 define i8 @smax_i8(i8 %a, i8 %b) nounwind readnone { -; X64-LABEL: smax_i8: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil -; X64-NEXT: cmovgl %edi, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq +; DAG-X64-LABEL: smax_i8: +; DAG-X64: # %bb.0: +; DAG-X64-NEXT: movl %esi, %eax +; DAG-X64-NEXT: cmpb %al, %dil +; DAG-X64-NEXT: cmovgl %edi, %eax +; DAG-X64-NEXT: # kill: def $al killed $al killed $eax +; DAG-X64-NEXT: retq ; ; FASTISEL-X64-LABEL: smax_i8: ; FASTISEL-X64: # %bb.0: @@ -24,6 +24,17 @@ define i8 @smax_i8(i8 %a, i8 %b) nounwind readnone { ; FASTISEL-X64-NEXT: # kill: def $al killed $al killed $eax ; FASTISEL-X64-NEXT: retq ; +; GISEL-X64-LABEL: smax_i8: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movl %esi, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpb %al, %dil +; GISEL-X64-NEXT: setg %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovnew %di, %ax +; GISEL-X64-NEXT: # kill: def $al killed $al killed $eax +; GISEL-X64-NEXT: retq +; ; X86-LABEL: smax_i8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx @@ -35,16 +46,20 @@ define i8 @smax_i8(i8 %a, i8 %b) nounwind readnone { ; X86-NEXT: .LBB0_2: ; X86-NEXT: retl ; -; FASTISEL-X86-LABEL: smax_i8: -; FASTISEL-X86: # %bb.0: -; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; FASTISEL-X86-NEXT: cmpb %cl, %al -; FASTISEL-X86-NEXT: jg .LBB0_2 -; FASTISEL-X86-NEXT: # %bb.1: -; FASTISEL-X86-NEXT: movl %ecx, %eax -; FASTISEL-X86-NEXT: .LBB0_2: -; FASTISEL-X86-NEXT: retl +; GISEL-X86-LABEL: smax_i8: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: cmpb %al, %cl +; GISEL-X86-NEXT: setg %dl +; GISEL-X86-NEXT: andl $1, %edx +; GISEL-X86-NEXT: je .LBB0_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movl %ecx, %eax +; GISEL-X86-NEXT: .LBB0_2: +; GISEL-X86-NEXT: # kill: def $al killed $al killed $eax +; GISEL-X86-NEXT: retl %ret = call i8 @llvm.smax.i8(i8 %a, i8 %b) ret i8 %ret } @@ -57,25 +72,28 @@ define i16 @smax_i16(i16 %a, i16 %b) nounwind readnone { ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; -; FASTISEL-X64-LABEL: smax_i16: -; FASTISEL-X64: # %bb.0: -; FASTISEL-X64-NEXT: movl %esi, %eax -; FASTISEL-X64-NEXT: cmpw %ax, %di -; FASTISEL-X64-NEXT: cmovgl %edi, %eax -; FASTISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax -; FASTISEL-X64-NEXT: retq +; GISEL-X64-LABEL: smax_i16: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movl %edi, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpw %si, %ax +; GISEL-X64-NEXT: setg %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovew %si, %ax +; GISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X64-NEXT: retq ; -; X86-LABEL: smax_i16: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpw %cx, %ax -; X86-NEXT: jg .LBB1_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB1_2: -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: retl +; DAG-X86-LABEL: smax_i16: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: cmpw %cx, %ax +; DAG-X86-NEXT: jg .LBB1_2 +; DAG-X86-NEXT: # %bb.1: +; DAG-X86-NEXT: movl %ecx, %eax +; DAG-X86-NEXT: .LBB1_2: +; DAG-X86-NEXT: # kill: def $ax killed $ax killed $eax +; DAG-X86-NEXT: retl ; ; FASTISEL-X86-LABEL: smax_i16: ; FASTISEL-X86: # %bb.0: @@ -88,6 +106,21 @@ define i16 @smax_i16(i16 %a, i16 %b) nounwind readnone { ; FASTISEL-X86-NEXT: .LBB1_2: ; FASTISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax ; FASTISEL-X86-NEXT: retl +; +; GISEL-X86-LABEL: smax_i16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: cmpw %ax, %cx +; GISEL-X86-NEXT: setg %dl +; GISEL-X86-NEXT: andl $1, %edx +; GISEL-X86-NEXT: je .LBB1_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movl %ecx, %eax +; GISEL-X86-NEXT: .LBB1_2: +; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X86-NEXT: retl %ret = call i16 @llvm.smax.i16(i16 %a, i16 %b) ret i16 %ret } @@ -99,12 +132,15 @@ define i32 @smax_i32(i32 %a, i32 %b) nounwind readnone { ; X64-NEXT: cmovgl %edi, %eax ; X64-NEXT: retq ; -; FASTISEL-X64-LABEL: smax_i32: -; FASTISEL-X64: # %bb.0: -; FASTISEL-X64-NEXT: movl %esi, %eax -; FASTISEL-X64-NEXT: cmpl %esi, %edi -; FASTISEL-X64-NEXT: cmovgl %edi, %eax -; FASTISEL-X64-NEXT: retq +; GISEL-X64-LABEL: smax_i32: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movl %edi, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpl %esi, %edi +; GISEL-X64-NEXT: setg %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovel %esi, %eax +; GISEL-X64-NEXT: retq ; ; X86-LABEL: smax_i32: ; X86: # %bb.0: @@ -117,16 +153,19 @@ define i32 @smax_i32(i32 %a, i32 %b) nounwind readnone { ; X86-NEXT: .LBB2_2: ; X86-NEXT: retl ; -; FASTISEL-X86-LABEL: smax_i32: -; FASTISEL-X86: # %bb.0: -; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; FASTISEL-X86-NEXT: cmpl %ecx, %eax -; FASTISEL-X86-NEXT: jg .LBB2_2 -; FASTISEL-X86-NEXT: # %bb.1: -; FASTISEL-X86-NEXT: movl %ecx, %eax -; FASTISEL-X86-NEXT: .LBB2_2: -; FASTISEL-X86-NEXT: retl +; GISEL-X86-LABEL: smax_i32: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: cmpl %eax, %ecx +; GISEL-X86-NEXT: setg %dl +; GISEL-X86-NEXT: andl $1, %edx +; GISEL-X86-NEXT: je .LBB2_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movl %ecx, %eax +; GISEL-X86-NEXT: .LBB2_2: +; GISEL-X86-NEXT: retl %ret = call i32 @llvm.smax.i32(i32 %a, i32 %b) ret i32 %ret } @@ -138,32 +177,35 @@ define i64 @smax_i64(i64 %a, i64 %b) nounwind readnone { ; X64-NEXT: cmovgq %rdi, %rax ; X64-NEXT: retq ; -; FASTISEL-X64-LABEL: smax_i64: -; FASTISEL-X64: # %bb.0: -; FASTISEL-X64-NEXT: movq %rsi, %rax -; FASTISEL-X64-NEXT: cmpq %rsi, %rdi -; FASTISEL-X64-NEXT: cmovgq %rdi, %rax -; FASTISEL-X64-NEXT: retq +; GISEL-X64-LABEL: smax_i64: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movq %rdi, %rax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpq %rsi, %rdi +; GISEL-X64-NEXT: setg %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmoveq %rsi, %rax +; GISEL-X64-NEXT: retq ; -; X86-LABEL: smax_i64: -; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: movl %esi, %edi -; X86-NEXT: sbbl %edx, %edi -; X86-NEXT: jl .LBB3_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl %esi, %edx -; X86-NEXT: .LBB3_2: -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: retl +; DAG-X86-LABEL: smax_i64: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: pushl %edi +; DAG-X86-NEXT: pushl %esi +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; DAG-X86-NEXT: cmpl %eax, %ecx +; DAG-X86-NEXT: movl %esi, %edi +; DAG-X86-NEXT: sbbl %edx, %edi +; DAG-X86-NEXT: jl .LBB3_2 +; DAG-X86-NEXT: # %bb.1: +; DAG-X86-NEXT: movl %ecx, %eax +; DAG-X86-NEXT: movl %esi, %edx +; DAG-X86-NEXT: .LBB3_2: +; DAG-X86-NEXT: popl %esi +; DAG-X86-NEXT: popl %edi +; DAG-X86-NEXT: retl ; ; FASTISEL-X86-LABEL: smax_i64: ; FASTISEL-X86: # %bb.0: @@ -184,6 +226,44 @@ define i64 @smax_i64(i64 %a, i64 %b) nounwind readnone { ; FASTISEL-X86-NEXT: popl %esi ; FASTISEL-X86-NEXT: popl %edi ; FASTISEL-X86-NEXT: retl +; +; GISEL-X86-LABEL: smax_i64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %ebp +; GISEL-X86-NEXT: pushl %ebx +; GISEL-X86-NEXT: pushl %edi +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: cmpl %eax, %esi +; GISEL-X86-NEXT: seta %bl +; GISEL-X86-NEXT: xorl %ecx, %ecx +; GISEL-X86-NEXT: cmpl %edx, %ebp +; GISEL-X86-NEXT: setg %bh +; GISEL-X86-NEXT: sete %cl +; GISEL-X86-NEXT: testl %ecx, %ecx +; GISEL-X86-NEXT: je .LBB3_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movb %bl, %bh +; GISEL-X86-NEXT: .LBB3_2: +; GISEL-X86-NEXT: movzbl %bh, %edi +; GISEL-X86-NEXT: andl $1, %edi +; GISEL-X86-NEXT: je .LBB3_4 +; GISEL-X86-NEXT: # %bb.3: +; GISEL-X86-NEXT: movl %esi, %eax +; GISEL-X86-NEXT: .LBB3_4: +; GISEL-X86-NEXT: testl %edi, %edi +; GISEL-X86-NEXT: je .LBB3_6 +; GISEL-X86-NEXT: # %bb.5: +; GISEL-X86-NEXT: movl %ebp, %edx +; GISEL-X86-NEXT: .LBB3_6: +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: popl %edi +; GISEL-X86-NEXT: popl %ebx +; GISEL-X86-NEXT: popl %ebp +; GISEL-X86-NEXT: retl %ret = call i64 @llvm.smax.i64(i64 %a, i64 %b) ret i64 %ret } diff --git a/llvm/test/CodeGen/X86/isel-smin.ll b/llvm/test/CodeGen/X86/isel-smin.ll index 7349a7c..bbed3c3 100644 --- a/llvm/test/CodeGen/X86/isel-smin.ll +++ b/llvm/test/CodeGen/X86/isel-smin.ll @@ -1,19 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X64 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 -; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X86 -; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86,FASTISEL-X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 define i8 @smin_i8(i8 %a, i8 %b) nounwind readnone { -; X64-LABEL: smin_i8: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil -; X64-NEXT: cmovll %edi, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq +; DAG-X64-LABEL: smin_i8: +; DAG-X64: # %bb.0: +; DAG-X64-NEXT: movl %esi, %eax +; DAG-X64-NEXT: cmpb %al, %dil +; DAG-X64-NEXT: cmovll %edi, %eax +; DAG-X64-NEXT: # kill: def $al killed $al killed $eax +; DAG-X64-NEXT: retq ; ; FASTISEL-X64-LABEL: smin_i8: ; FASTISEL-X64: # %bb.0: @@ -24,6 +24,17 @@ define i8 @smin_i8(i8 %a, i8 %b) nounwind readnone { ; FASTISEL-X64-NEXT: # kill: def $al killed $al killed $eax ; FASTISEL-X64-NEXT: retq ; +; GISEL-X64-LABEL: smin_i8: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movl %esi, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpb %al, %dil +; GISEL-X64-NEXT: setl %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovnew %di, %ax +; GISEL-X64-NEXT: # kill: def $al killed $al killed $eax +; GISEL-X64-NEXT: retq +; ; X86-LABEL: smin_i8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx @@ -35,16 +46,20 @@ define i8 @smin_i8(i8 %a, i8 %b) nounwind readnone { ; X86-NEXT: .LBB0_2: ; X86-NEXT: retl ; -; FASTISEL-X86-LABEL: smin_i8: -; FASTISEL-X86: # %bb.0: -; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; FASTISEL-X86-NEXT: cmpb %cl, %al -; FASTISEL-X86-NEXT: jl .LBB0_2 -; FASTISEL-X86-NEXT: # %bb.1: -; FASTISEL-X86-NEXT: movl %ecx, %eax -; FASTISEL-X86-NEXT: .LBB0_2: -; FASTISEL-X86-NEXT: retl +; GISEL-X86-LABEL: smin_i8: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: cmpb %al, %cl +; GISEL-X86-NEXT: setl %dl +; GISEL-X86-NEXT: andl $1, %edx +; GISEL-X86-NEXT: je .LBB0_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movl %ecx, %eax +; GISEL-X86-NEXT: .LBB0_2: +; GISEL-X86-NEXT: # kill: def $al killed $al killed $eax +; GISEL-X86-NEXT: retl %ret = call i8 @llvm.smin.i8(i8 %a, i8 %b) ret i8 %ret } @@ -57,25 +72,28 @@ define i16 @smin_i16(i16 %a, i16 %b) nounwind readnone { ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; -; FASTISEL-X64-LABEL: smin_i16: -; FASTISEL-X64: # %bb.0: -; FASTISEL-X64-NEXT: movl %esi, %eax -; FASTISEL-X64-NEXT: cmpw %ax, %di -; FASTISEL-X64-NEXT: cmovll %edi, %eax -; FASTISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax -; FASTISEL-X64-NEXT: retq +; GISEL-X64-LABEL: smin_i16: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movl %edi, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpw %si, %ax +; GISEL-X64-NEXT: setl %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovew %si, %ax +; GISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X64-NEXT: retq ; -; X86-LABEL: smin_i16: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpw %cx, %ax -; X86-NEXT: jl .LBB1_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB1_2: -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: retl +; DAG-X86-LABEL: smin_i16: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: cmpw %cx, %ax +; DAG-X86-NEXT: jl .LBB1_2 +; DAG-X86-NEXT: # %bb.1: +; DAG-X86-NEXT: movl %ecx, %eax +; DAG-X86-NEXT: .LBB1_2: +; DAG-X86-NEXT: # kill: def $ax killed $ax killed $eax +; DAG-X86-NEXT: retl ; ; FASTISEL-X86-LABEL: smin_i16: ; FASTISEL-X86: # %bb.0: @@ -88,6 +106,21 @@ define i16 @smin_i16(i16 %a, i16 %b) nounwind readnone { ; FASTISEL-X86-NEXT: .LBB1_2: ; FASTISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax ; FASTISEL-X86-NEXT: retl +; +; GISEL-X86-LABEL: smin_i16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: cmpw %ax, %cx +; GISEL-X86-NEXT: setl %dl +; GISEL-X86-NEXT: andl $1, %edx +; GISEL-X86-NEXT: je .LBB1_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movl %ecx, %eax +; GISEL-X86-NEXT: .LBB1_2: +; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X86-NEXT: retl %ret = call i16 @llvm.smin.i16(i16 %a, i16 %b) ret i16 %ret } @@ -99,12 +132,15 @@ define i32 @smin_i32(i32 %a, i32 %b) nounwind readnone { ; X64-NEXT: cmovll %edi, %eax ; X64-NEXT: retq ; -; FASTISEL-X64-LABEL: smin_i32: -; FASTISEL-X64: # %bb.0: -; FASTISEL-X64-NEXT: movl %esi, %eax -; FASTISEL-X64-NEXT: cmpl %esi, %edi -; FASTISEL-X64-NEXT: cmovll %edi, %eax -; FASTISEL-X64-NEXT: retq +; GISEL-X64-LABEL: smin_i32: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movl %edi, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpl %esi, %edi +; GISEL-X64-NEXT: setl %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovel %esi, %eax +; GISEL-X64-NEXT: retq ; ; X86-LABEL: smin_i32: ; X86: # %bb.0: @@ -117,16 +153,19 @@ define i32 @smin_i32(i32 %a, i32 %b) nounwind readnone { ; X86-NEXT: .LBB2_2: ; X86-NEXT: retl ; -; FASTISEL-X86-LABEL: smin_i32: -; FASTISEL-X86: # %bb.0: -; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; FASTISEL-X86-NEXT: cmpl %ecx, %eax -; FASTISEL-X86-NEXT: jl .LBB2_2 -; FASTISEL-X86-NEXT: # %bb.1: -; FASTISEL-X86-NEXT: movl %ecx, %eax -; FASTISEL-X86-NEXT: .LBB2_2: -; FASTISEL-X86-NEXT: retl +; GISEL-X86-LABEL: smin_i32: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: cmpl %eax, %ecx +; GISEL-X86-NEXT: setl %dl +; GISEL-X86-NEXT: andl $1, %edx +; GISEL-X86-NEXT: je .LBB2_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movl %ecx, %eax +; GISEL-X86-NEXT: .LBB2_2: +; GISEL-X86-NEXT: retl %ret = call i32 @llvm.smin.i32(i32 %a, i32 %b) ret i32 %ret } @@ -138,32 +177,35 @@ define i64 @smin_i64(i64 %a, i64 %b) nounwind readnone { ; X64-NEXT: cmovlq %rdi, %rax ; X64-NEXT: retq ; -; FASTISEL-X64-LABEL: smin_i64: -; FASTISEL-X64: # %bb.0: -; FASTISEL-X64-NEXT: movq %rsi, %rax -; FASTISEL-X64-NEXT: cmpq %rsi, %rdi -; FASTISEL-X64-NEXT: cmovlq %rdi, %rax -; FASTISEL-X64-NEXT: retq +; GISEL-X64-LABEL: smin_i64: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movq %rdi, %rax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpq %rsi, %rdi +; GISEL-X64-NEXT: setl %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmoveq %rsi, %rax +; GISEL-X64-NEXT: retq ; -; X86-LABEL: smin_i64: -; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: movl %edx, %edi -; X86-NEXT: sbbl %esi, %edi -; X86-NEXT: jl .LBB3_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl %esi, %edx -; X86-NEXT: .LBB3_2: -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: retl +; DAG-X86-LABEL: smin_i64: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: pushl %edi +; DAG-X86-NEXT: pushl %esi +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; DAG-X86-NEXT: cmpl %ecx, %eax +; DAG-X86-NEXT: movl %edx, %edi +; DAG-X86-NEXT: sbbl %esi, %edi +; DAG-X86-NEXT: jl .LBB3_2 +; DAG-X86-NEXT: # %bb.1: +; DAG-X86-NEXT: movl %ecx, %eax +; DAG-X86-NEXT: movl %esi, %edx +; DAG-X86-NEXT: .LBB3_2: +; DAG-X86-NEXT: popl %esi +; DAG-X86-NEXT: popl %edi +; DAG-X86-NEXT: retl ; ; FASTISEL-X86-LABEL: smin_i64: ; FASTISEL-X86: # %bb.0: @@ -184,6 +226,44 @@ define i64 @smin_i64(i64 %a, i64 %b) nounwind readnone { ; FASTISEL-X86-NEXT: popl %esi ; FASTISEL-X86-NEXT: popl %edi ; FASTISEL-X86-NEXT: retl +; +; GISEL-X86-LABEL: smin_i64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %ebp +; GISEL-X86-NEXT: pushl %ebx +; GISEL-X86-NEXT: pushl %edi +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: cmpl %eax, %esi +; GISEL-X86-NEXT: setb %bl +; GISEL-X86-NEXT: xorl %ecx, %ecx +; GISEL-X86-NEXT: cmpl %edx, %ebp +; GISEL-X86-NEXT: setl %bh +; GISEL-X86-NEXT: sete %cl +; GISEL-X86-NEXT: testl %ecx, %ecx +; GISEL-X86-NEXT: je .LBB3_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movb %bl, %bh +; GISEL-X86-NEXT: .LBB3_2: +; GISEL-X86-NEXT: movzbl %bh, %edi +; GISEL-X86-NEXT: andl $1, %edi +; GISEL-X86-NEXT: je .LBB3_4 +; GISEL-X86-NEXT: # %bb.3: +; GISEL-X86-NEXT: movl %esi, %eax +; GISEL-X86-NEXT: .LBB3_4: +; GISEL-X86-NEXT: testl %edi, %edi +; GISEL-X86-NEXT: je .LBB3_6 +; GISEL-X86-NEXT: # %bb.5: +; GISEL-X86-NEXT: movl %ebp, %edx +; GISEL-X86-NEXT: .LBB3_6: +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: popl %edi +; GISEL-X86-NEXT: popl %ebx +; GISEL-X86-NEXT: popl %ebp +; GISEL-X86-NEXT: retl %ret = call i64 @llvm.smin.i64(i64 %a, i64 %b) ret i64 %ret } diff --git a/llvm/test/CodeGen/X86/isel-umax.ll b/llvm/test/CodeGen/X86/isel-umax.ll index a90456c..990af26 100644 --- a/llvm/test/CodeGen/X86/isel-umax.ll +++ b/llvm/test/CodeGen/X86/isel-umax.ll @@ -1,19 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X64 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 -; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X86 -; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86,FASTISEL-X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 define i8 @umax_i8(i8 %a, i8 %b) nounwind readnone { -; X64-LABEL: umax_i8: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil -; X64-NEXT: cmoval %edi, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq +; DAG-X64-LABEL: umax_i8: +; DAG-X64: # %bb.0: +; DAG-X64-NEXT: movl %esi, %eax +; DAG-X64-NEXT: cmpb %al, %dil +; DAG-X64-NEXT: cmoval %edi, %eax +; DAG-X64-NEXT: # kill: def $al killed $al killed $eax +; DAG-X64-NEXT: retq ; ; FASTISEL-X64-LABEL: umax_i8: ; FASTISEL-X64: # %bb.0: @@ -24,6 +24,17 @@ define i8 @umax_i8(i8 %a, i8 %b) nounwind readnone { ; FASTISEL-X64-NEXT: # kill: def $al killed $al killed $eax ; FASTISEL-X64-NEXT: retq ; +; GISEL-X64-LABEL: umax_i8: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movl %esi, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpb %al, %dil +; GISEL-X64-NEXT: seta %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovnew %di, %ax +; GISEL-X64-NEXT: # kill: def $al killed $al killed $eax +; GISEL-X64-NEXT: retq +; ; X86-LABEL: umax_i8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx @@ -35,16 +46,20 @@ define i8 @umax_i8(i8 %a, i8 %b) nounwind readnone { ; X86-NEXT: .LBB0_2: ; X86-NEXT: retl ; -; FASTISEL-X86-LABEL: umax_i8: -; FASTISEL-X86: # %bb.0: -; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; FASTISEL-X86-NEXT: cmpb %cl, %al -; FASTISEL-X86-NEXT: ja .LBB0_2 -; FASTISEL-X86-NEXT: # %bb.1: -; FASTISEL-X86-NEXT: movl %ecx, %eax -; FASTISEL-X86-NEXT: .LBB0_2: -; FASTISEL-X86-NEXT: retl +; GISEL-X86-LABEL: umax_i8: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: cmpb %al, %cl +; GISEL-X86-NEXT: seta %dl +; GISEL-X86-NEXT: andl $1, %edx +; GISEL-X86-NEXT: je .LBB0_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movl %ecx, %eax +; GISEL-X86-NEXT: .LBB0_2: +; GISEL-X86-NEXT: # kill: def $al killed $al killed $eax +; GISEL-X86-NEXT: retl %ret = call i8 @llvm.umax.i8(i8 %a, i8 %b) ret i8 %ret } @@ -57,25 +72,28 @@ define i16 @umax_i16(i16 %a, i16 %b) nounwind readnone { ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; -; FASTISEL-X64-LABEL: umax_i16: -; FASTISEL-X64: # %bb.0: -; FASTISEL-X64-NEXT: movl %esi, %eax -; FASTISEL-X64-NEXT: cmpw %ax, %di -; FASTISEL-X64-NEXT: cmoval %edi, %eax -; FASTISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax -; FASTISEL-X64-NEXT: retq +; GISEL-X64-LABEL: umax_i16: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movl %edi, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpw %si, %ax +; GISEL-X64-NEXT: seta %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovew %si, %ax +; GISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X64-NEXT: retq ; -; X86-LABEL: umax_i16: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpw %cx, %ax -; X86-NEXT: ja .LBB1_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB1_2: -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: retl +; DAG-X86-LABEL: umax_i16: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: cmpw %cx, %ax +; DAG-X86-NEXT: ja .LBB1_2 +; DAG-X86-NEXT: # %bb.1: +; DAG-X86-NEXT: movl %ecx, %eax +; DAG-X86-NEXT: .LBB1_2: +; DAG-X86-NEXT: # kill: def $ax killed $ax killed $eax +; DAG-X86-NEXT: retl ; ; FASTISEL-X86-LABEL: umax_i16: ; FASTISEL-X86: # %bb.0: @@ -88,6 +106,21 @@ define i16 @umax_i16(i16 %a, i16 %b) nounwind readnone { ; FASTISEL-X86-NEXT: .LBB1_2: ; FASTISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax ; FASTISEL-X86-NEXT: retl +; +; GISEL-X86-LABEL: umax_i16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: cmpw %ax, %cx +; GISEL-X86-NEXT: seta %dl +; GISEL-X86-NEXT: andl $1, %edx +; GISEL-X86-NEXT: je .LBB1_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movl %ecx, %eax +; GISEL-X86-NEXT: .LBB1_2: +; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X86-NEXT: retl %ret = call i16 @llvm.umax.i16(i16 %a, i16 %b) ret i16 %ret } @@ -99,12 +132,15 @@ define i32 @umax_i32(i32 %a, i32 %b) nounwind readnone { ; X64-NEXT: cmoval %edi, %eax ; X64-NEXT: retq ; -; FASTISEL-X64-LABEL: umax_i32: -; FASTISEL-X64: # %bb.0: -; FASTISEL-X64-NEXT: movl %esi, %eax -; FASTISEL-X64-NEXT: cmpl %esi, %edi -; FASTISEL-X64-NEXT: cmoval %edi, %eax -; FASTISEL-X64-NEXT: retq +; GISEL-X64-LABEL: umax_i32: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movl %edi, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpl %esi, %edi +; GISEL-X64-NEXT: seta %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovel %esi, %eax +; GISEL-X64-NEXT: retq ; ; X86-LABEL: umax_i32: ; X86: # %bb.0: @@ -117,16 +153,19 @@ define i32 @umax_i32(i32 %a, i32 %b) nounwind readnone { ; X86-NEXT: .LBB2_2: ; X86-NEXT: retl ; -; FASTISEL-X86-LABEL: umax_i32: -; FASTISEL-X86: # %bb.0: -; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; FASTISEL-X86-NEXT: cmpl %ecx, %eax -; FASTISEL-X86-NEXT: ja .LBB2_2 -; FASTISEL-X86-NEXT: # %bb.1: -; FASTISEL-X86-NEXT: movl %ecx, %eax -; FASTISEL-X86-NEXT: .LBB2_2: -; FASTISEL-X86-NEXT: retl +; GISEL-X86-LABEL: umax_i32: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: cmpl %eax, %ecx +; GISEL-X86-NEXT: seta %dl +; GISEL-X86-NEXT: andl $1, %edx +; GISEL-X86-NEXT: je .LBB2_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movl %ecx, %eax +; GISEL-X86-NEXT: .LBB2_2: +; GISEL-X86-NEXT: retl %ret = call i32 @llvm.umax.i32(i32 %a, i32 %b) ret i32 %ret } @@ -138,32 +177,35 @@ define i64 @umax_i64(i64 %a, i64 %b) nounwind readnone { ; X64-NEXT: cmovaq %rdi, %rax ; X64-NEXT: retq ; -; FASTISEL-X64-LABEL: umax_i64: -; FASTISEL-X64: # %bb.0: -; FASTISEL-X64-NEXT: movq %rsi, %rax -; FASTISEL-X64-NEXT: cmpq %rsi, %rdi -; FASTISEL-X64-NEXT: cmovaq %rdi, %rax -; FASTISEL-X64-NEXT: retq +; GISEL-X64-LABEL: umax_i64: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movq %rdi, %rax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpq %rsi, %rdi +; GISEL-X64-NEXT: seta %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmoveq %rsi, %rax +; GISEL-X64-NEXT: retq ; -; X86-LABEL: umax_i64: -; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: movl %esi, %edi -; X86-NEXT: sbbl %edx, %edi -; X86-NEXT: jb .LBB3_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl %esi, %edx -; X86-NEXT: .LBB3_2: -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: retl +; DAG-X86-LABEL: umax_i64: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: pushl %edi +; DAG-X86-NEXT: pushl %esi +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; DAG-X86-NEXT: cmpl %eax, %ecx +; DAG-X86-NEXT: movl %esi, %edi +; DAG-X86-NEXT: sbbl %edx, %edi +; DAG-X86-NEXT: jb .LBB3_2 +; DAG-X86-NEXT: # %bb.1: +; DAG-X86-NEXT: movl %ecx, %eax +; DAG-X86-NEXT: movl %esi, %edx +; DAG-X86-NEXT: .LBB3_2: +; DAG-X86-NEXT: popl %esi +; DAG-X86-NEXT: popl %edi +; DAG-X86-NEXT: retl ; ; FASTISEL-X86-LABEL: umax_i64: ; FASTISEL-X86: # %bb.0: @@ -184,6 +226,44 @@ define i64 @umax_i64(i64 %a, i64 %b) nounwind readnone { ; FASTISEL-X86-NEXT: popl %esi ; FASTISEL-X86-NEXT: popl %edi ; FASTISEL-X86-NEXT: retl +; +; GISEL-X86-LABEL: umax_i64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %ebp +; GISEL-X86-NEXT: pushl %ebx +; GISEL-X86-NEXT: pushl %edi +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: cmpl %eax, %esi +; GISEL-X86-NEXT: seta %bl +; GISEL-X86-NEXT: xorl %ecx, %ecx +; GISEL-X86-NEXT: cmpl %edx, %ebp +; GISEL-X86-NEXT: seta %bh +; GISEL-X86-NEXT: sete %cl +; GISEL-X86-NEXT: testl %ecx, %ecx +; GISEL-X86-NEXT: je .LBB3_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movb %bl, %bh +; GISEL-X86-NEXT: .LBB3_2: +; GISEL-X86-NEXT: movzbl %bh, %edi +; GISEL-X86-NEXT: andl $1, %edi +; GISEL-X86-NEXT: je .LBB3_4 +; GISEL-X86-NEXT: # %bb.3: +; GISEL-X86-NEXT: movl %esi, %eax +; GISEL-X86-NEXT: .LBB3_4: +; GISEL-X86-NEXT: testl %edi, %edi +; GISEL-X86-NEXT: je .LBB3_6 +; GISEL-X86-NEXT: # %bb.5: +; GISEL-X86-NEXT: movl %ebp, %edx +; GISEL-X86-NEXT: .LBB3_6: +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: popl %edi +; GISEL-X86-NEXT: popl %ebx +; GISEL-X86-NEXT: popl %ebp +; GISEL-X86-NEXT: retl %ret = call i64 @llvm.umax.i64(i64 %a, i64 %b) ret i64 %ret } diff --git a/llvm/test/CodeGen/X86/isel-umin.ll b/llvm/test/CodeGen/X86/isel-umin.ll index 53a0b27..1710b9f 100644 --- a/llvm/test/CodeGen/X86/isel-umin.ll +++ b/llvm/test/CodeGen/X86/isel-umin.ll @@ -1,19 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X64 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86 -; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=FASTISEL-X86 -; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64,DAG-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 +; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86,DAG-X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X86,FASTISEL-X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 define i8 @umin_i8(i8 %a, i8 %b) nounwind readnone { -; X64-LABEL: umin_i8: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil -; X64-NEXT: cmovbl %edi, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq +; DAG-X64-LABEL: umin_i8: +; DAG-X64: # %bb.0: +; DAG-X64-NEXT: movl %esi, %eax +; DAG-X64-NEXT: cmpb %al, %dil +; DAG-X64-NEXT: cmovbl %edi, %eax +; DAG-X64-NEXT: # kill: def $al killed $al killed $eax +; DAG-X64-NEXT: retq ; ; FASTISEL-X64-LABEL: umin_i8: ; FASTISEL-X64: # %bb.0: @@ -24,6 +24,17 @@ define i8 @umin_i8(i8 %a, i8 %b) nounwind readnone { ; FASTISEL-X64-NEXT: # kill: def $al killed $al killed $eax ; FASTISEL-X64-NEXT: retq ; +; GISEL-X64-LABEL: umin_i8: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movl %esi, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpb %al, %dil +; GISEL-X64-NEXT: setb %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovnew %di, %ax +; GISEL-X64-NEXT: # kill: def $al killed $al killed $eax +; GISEL-X64-NEXT: retq +; ; X86-LABEL: umin_i8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx @@ -35,16 +46,20 @@ define i8 @umin_i8(i8 %a, i8 %b) nounwind readnone { ; X86-NEXT: .LBB0_2: ; X86-NEXT: retl ; -; FASTISEL-X86-LABEL: umin_i8: -; FASTISEL-X86: # %bb.0: -; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; FASTISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; FASTISEL-X86-NEXT: cmpb %cl, %al -; FASTISEL-X86-NEXT: jb .LBB0_2 -; FASTISEL-X86-NEXT: # %bb.1: -; FASTISEL-X86-NEXT: movl %ecx, %eax -; FASTISEL-X86-NEXT: .LBB0_2: -; FASTISEL-X86-NEXT: retl +; GISEL-X86-LABEL: umin_i8: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: cmpb %al, %cl +; GISEL-X86-NEXT: setb %dl +; GISEL-X86-NEXT: andl $1, %edx +; GISEL-X86-NEXT: je .LBB0_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movl %ecx, %eax +; GISEL-X86-NEXT: .LBB0_2: +; GISEL-X86-NEXT: # kill: def $al killed $al killed $eax +; GISEL-X86-NEXT: retl %ret = call i8 @llvm.umin.i8(i8 %a, i8 %b) ret i8 %ret } @@ -57,25 +72,28 @@ define i16 @umin_i16(i16 %a, i16 %b) nounwind readnone { ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; -; FASTISEL-X64-LABEL: umin_i16: -; FASTISEL-X64: # %bb.0: -; FASTISEL-X64-NEXT: movl %esi, %eax -; FASTISEL-X64-NEXT: cmpw %ax, %di -; FASTISEL-X64-NEXT: cmovbl %edi, %eax -; FASTISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax -; FASTISEL-X64-NEXT: retq +; GISEL-X64-LABEL: umin_i16: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movl %edi, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpw %si, %ax +; GISEL-X64-NEXT: setb %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovew %si, %ax +; GISEL-X64-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X64-NEXT: retq ; -; X86-LABEL: umin_i16: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpw %cx, %ax -; X86-NEXT: jb .LBB1_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB1_2: -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: retl +; DAG-X86-LABEL: umin_i16: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: cmpw %cx, %ax +; DAG-X86-NEXT: jb .LBB1_2 +; DAG-X86-NEXT: # %bb.1: +; DAG-X86-NEXT: movl %ecx, %eax +; DAG-X86-NEXT: .LBB1_2: +; DAG-X86-NEXT: # kill: def $ax killed $ax killed $eax +; DAG-X86-NEXT: retl ; ; FASTISEL-X86-LABEL: umin_i16: ; FASTISEL-X86: # %bb.0: @@ -88,6 +106,21 @@ define i16 @umin_i16(i16 %a, i16 %b) nounwind readnone { ; FASTISEL-X86-NEXT: .LBB1_2: ; FASTISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax ; FASTISEL-X86-NEXT: retl +; +; GISEL-X86-LABEL: umin_i16: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: cmpw %ax, %cx +; GISEL-X86-NEXT: setb %dl +; GISEL-X86-NEXT: andl $1, %edx +; GISEL-X86-NEXT: je .LBB1_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movl %ecx, %eax +; GISEL-X86-NEXT: .LBB1_2: +; GISEL-X86-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-X86-NEXT: retl %ret = call i16 @llvm.umin.i16(i16 %a, i16 %b) ret i16 %ret } @@ -99,12 +132,15 @@ define i32 @umin_i32(i32 %a, i32 %b) nounwind readnone { ; X64-NEXT: cmovbl %edi, %eax ; X64-NEXT: retq ; -; FASTISEL-X64-LABEL: umin_i32: -; FASTISEL-X64: # %bb.0: -; FASTISEL-X64-NEXT: movl %esi, %eax -; FASTISEL-X64-NEXT: cmpl %esi, %edi -; FASTISEL-X64-NEXT: cmovbl %edi, %eax -; FASTISEL-X64-NEXT: retq +; GISEL-X64-LABEL: umin_i32: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movl %edi, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpl %esi, %edi +; GISEL-X64-NEXT: setb %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovel %esi, %eax +; GISEL-X64-NEXT: retq ; ; X86-LABEL: umin_i32: ; X86: # %bb.0: @@ -117,16 +153,19 @@ define i32 @umin_i32(i32 %a, i32 %b) nounwind readnone { ; X86-NEXT: .LBB2_2: ; X86-NEXT: retl ; -; FASTISEL-X86-LABEL: umin_i32: -; FASTISEL-X86: # %bb.0: -; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; FASTISEL-X86-NEXT: cmpl %ecx, %eax -; FASTISEL-X86-NEXT: jb .LBB2_2 -; FASTISEL-X86-NEXT: # %bb.1: -; FASTISEL-X86-NEXT: movl %ecx, %eax -; FASTISEL-X86-NEXT: .LBB2_2: -; FASTISEL-X86-NEXT: retl +; GISEL-X86-LABEL: umin_i32: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: xorl %edx, %edx +; GISEL-X86-NEXT: cmpl %eax, %ecx +; GISEL-X86-NEXT: setb %dl +; GISEL-X86-NEXT: andl $1, %edx +; GISEL-X86-NEXT: je .LBB2_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movl %ecx, %eax +; GISEL-X86-NEXT: .LBB2_2: +; GISEL-X86-NEXT: retl %ret = call i32 @llvm.umin.i32(i32 %a, i32 %b) ret i32 %ret } @@ -138,32 +177,35 @@ define i64 @umin_i64(i64 %a, i64 %b) nounwind readnone { ; X64-NEXT: cmovbq %rdi, %rax ; X64-NEXT: retq ; -; FASTISEL-X64-LABEL: umin_i64: -; FASTISEL-X64: # %bb.0: -; FASTISEL-X64-NEXT: movq %rsi, %rax -; FASTISEL-X64-NEXT: cmpq %rsi, %rdi -; FASTISEL-X64-NEXT: cmovbq %rdi, %rax -; FASTISEL-X64-NEXT: retq +; GISEL-X64-LABEL: umin_i64: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movq %rdi, %rax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpq %rsi, %rdi +; GISEL-X64-NEXT: setb %cl +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmoveq %rsi, %rax +; GISEL-X64-NEXT: retq ; -; X86-LABEL: umin_i64: -; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: movl %edx, %edi -; X86-NEXT: sbbl %esi, %edi -; X86-NEXT: jb .LBB3_2 -; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl %esi, %edx -; X86-NEXT: .LBB3_2: -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: retl +; DAG-X86-LABEL: umin_i64: +; DAG-X86: # %bb.0: +; DAG-X86-NEXT: pushl %edi +; DAG-X86-NEXT: pushl %esi +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; DAG-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; DAG-X86-NEXT: cmpl %ecx, %eax +; DAG-X86-NEXT: movl %edx, %edi +; DAG-X86-NEXT: sbbl %esi, %edi +; DAG-X86-NEXT: jb .LBB3_2 +; DAG-X86-NEXT: # %bb.1: +; DAG-X86-NEXT: movl %ecx, %eax +; DAG-X86-NEXT: movl %esi, %edx +; DAG-X86-NEXT: .LBB3_2: +; DAG-X86-NEXT: popl %esi +; DAG-X86-NEXT: popl %edi +; DAG-X86-NEXT: retl ; ; FASTISEL-X86-LABEL: umin_i64: ; FASTISEL-X86: # %bb.0: @@ -184,6 +226,44 @@ define i64 @umin_i64(i64 %a, i64 %b) nounwind readnone { ; FASTISEL-X86-NEXT: popl %esi ; FASTISEL-X86-NEXT: popl %edi ; FASTISEL-X86-NEXT: retl +; +; GISEL-X86-LABEL: umin_i64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: pushl %ebp +; GISEL-X86-NEXT: pushl %ebx +; GISEL-X86-NEXT: pushl %edi +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: cmpl %eax, %esi +; GISEL-X86-NEXT: setb %bl +; GISEL-X86-NEXT: xorl %ecx, %ecx +; GISEL-X86-NEXT: cmpl %edx, %ebp +; GISEL-X86-NEXT: setb %bh +; GISEL-X86-NEXT: sete %cl +; GISEL-X86-NEXT: testl %ecx, %ecx +; GISEL-X86-NEXT: je .LBB3_2 +; GISEL-X86-NEXT: # %bb.1: +; GISEL-X86-NEXT: movb %bl, %bh +; GISEL-X86-NEXT: .LBB3_2: +; GISEL-X86-NEXT: movzbl %bh, %edi +; GISEL-X86-NEXT: andl $1, %edi +; GISEL-X86-NEXT: je .LBB3_4 +; GISEL-X86-NEXT: # %bb.3: +; GISEL-X86-NEXT: movl %esi, %eax +; GISEL-X86-NEXT: .LBB3_4: +; GISEL-X86-NEXT: testl %edi, %edi +; GISEL-X86-NEXT: je .LBB3_6 +; GISEL-X86-NEXT: # %bb.5: +; GISEL-X86-NEXT: movl %ebp, %edx +; GISEL-X86-NEXT: .LBB3_6: +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: popl %edi +; GISEL-X86-NEXT: popl %ebx +; GISEL-X86-NEXT: popl %ebp +; GISEL-X86-NEXT: retl %ret = call i64 @llvm.umin.i64(i64 %a, i64 %b) ret i64 %ret } diff --git a/llvm/test/CodeGen/X86/logic-shift.ll b/llvm/test/CodeGen/X86/logic-shift.ll index 96e63d1..104151c 100644 --- a/llvm/test/CodeGen/X86/logic-shift.ll +++ b/llvm/test/CodeGen/X86/logic-shift.ll @@ -129,10 +129,10 @@ define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, < ; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5 ; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 ; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5 -; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4 -; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 +; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm6 +; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1 ; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5 -; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vpsllw $2, %xmm4, %xmm4 ; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 ; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1 ; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -140,10 +140,10 @@ define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, < ; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4 ; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 ; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4 -; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 +; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm5 +; CHECK-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0 ; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4 -; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpsllw $2, %xmm2, %xmm2 ; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 ; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0 ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -413,10 +413,10 @@ define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, ; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5 ; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 ; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5 -; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4 -; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 +; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm6 +; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1 ; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5 -; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vpsllw $2, %xmm4, %xmm4 ; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 ; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1 ; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -424,10 +424,10 @@ define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, ; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4 ; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 ; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4 -; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 +; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm5 +; CHECK-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0 ; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4 -; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpsllw $2, %xmm2, %xmm2 ; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 ; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0 ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -697,10 +697,10 @@ define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, ; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5 ; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 ; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5 -; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4 -; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 +; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm6 +; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1 ; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5 -; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vpsllw $2, %xmm4, %xmm4 ; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 ; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1 ; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -708,10 +708,10 @@ define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, ; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4 ; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 ; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4 -; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 +; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm5 +; CHECK-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0 ; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4 -; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpsllw $2, %xmm2, %xmm2 ; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 ; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0 ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/pr161693.ll b/llvm/test/CodeGen/X86/pr161693.ll new file mode 100644 index 0000000..de8188f --- /dev/null +++ b/llvm/test/CodeGen/X86/pr161693.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s + +define void @PR161693() #0 { +; CHECK-LABEL: PR161693: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: movzbl (%rax), %eax +; CHECK-NEXT: andb $-33, %al +; CHECK-NEXT: addb $-71, %al +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpb $-6, %al +; CHECK-NEXT: setb %cl +; CHECK-NEXT: leal (%rcx,%rcx), %edx +; CHECK-NEXT: orb %cl, %dl +; CHECK-NEXT: leal (,%rdx,4), %ecx +; CHECK-NEXT: orb %dl, %cl +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: retq +start: + br label %loop + +loop: + %.val.i.i89 = load <16 x i8>, ptr poison, align 1 + %.not49.i = icmp ult <16 x i8> zeroinitializer, splat (i8 -10) + %i = and <16 x i8> %.val.i.i89, splat (i8 -33) + %i1 = add <16 x i8> %i, splat (i8 -71) + %.not51.i = icmp ult <16 x i8> %i1, splat (i8 -6) + %.not46.i = and <16 x i1> %.not49.i, %.not51.i + %i2 = bitcast <16 x i1> %.not46.i to i16 + %_0.i = icmp eq i16 %i2, 0 + br i1 %_0.i, label %loop, label %exit + +exit: + ret void +} + +attributes #0 = { "target-features"="+soft-float" } diff --git a/llvm/test/CodeGen/X86/prefer-avx256-shift.ll b/llvm/test/CodeGen/X86/prefer-avx256-shift.ll index bf04c8d..63bbac12 100644 --- a/llvm/test/CodeGen/X86/prefer-avx256-shift.ll +++ b/llvm/test/CodeGen/X86/prefer-avx256-shift.ll @@ -302,10 +302,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) { ; AVX256-NEXT: vpsraw $4, %ymm3, %ymm4 ; AVX256-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 ; AVX256-NEXT: vpsraw $2, %ymm3, %ymm4 -; AVX256-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; AVX256-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX256-NEXT: vpaddw %ymm2, %ymm2, %ymm5 +; AVX256-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 ; AVX256-NEXT: vpsraw $1, %ymm3, %ymm4 -; AVX256-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; AVX256-NEXT: vpsllw $2, %ymm2, %ymm2 ; AVX256-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 ; AVX256-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX256-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -313,10 +313,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) { ; AVX256-NEXT: vpsraw $4, %ymm0, %ymm3 ; AVX256-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX256-NEXT: vpsraw $2, %ymm0, %ymm3 -; AVX256-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; AVX256-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX256-NEXT: vpaddw %ymm1, %ymm1, %ymm4 +; AVX256-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 ; AVX256-NEXT: vpsraw $1, %ymm0, %ymm3 -; AVX256-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; AVX256-NEXT: vpsllw $2, %ymm1, %ymm1 ; AVX256-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX256-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX256-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 @@ -338,10 +338,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) { ; AVX512VL-NEXT: vpsraw $4, %ymm3, %ymm4 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsraw $2, %ymm3, %ymm4 -; AVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm5 +; AVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsraw $1, %ymm3, %ymm4 -; AVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm2 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -349,10 +349,10 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) { ; AVX512VL-NEXT: vpsraw $4, %ymm0, %ymm3 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsraw $2, %ymm0, %ymm3 -; AVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm4 +; AVX512VL-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsraw $1, %ymm0, %ymm3 -; AVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 @@ -432,10 +432,10 @@ define <16 x i8> @var_ashr_v16i8(<16 x i8> %a, <16 x i8> %b) { ; AVX256VL-NEXT: vpsraw $4, %xmm3, %xmm4 ; AVX256VL-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 ; AVX256VL-NEXT: vpsraw $2, %xmm3, %xmm4 -; AVX256VL-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX256VL-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 +; AVX256VL-NEXT: vpaddw %xmm2, %xmm2, %xmm5 +; AVX256VL-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 ; AVX256VL-NEXT: vpsraw $1, %xmm3, %xmm4 -; AVX256VL-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX256VL-NEXT: vpsllw $2, %xmm2, %xmm2 ; AVX256VL-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 ; AVX256VL-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX256VL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -443,10 +443,10 @@ define <16 x i8> @var_ashr_v16i8(<16 x i8> %a, <16 x i8> %b) { ; AVX256VL-NEXT: vpsraw $4, %xmm0, %xmm3 ; AVX256VL-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX256VL-NEXT: vpsraw $2, %xmm0, %xmm3 -; AVX256VL-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; AVX256VL-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 +; AVX256VL-NEXT: vpaddw %xmm1, %xmm1, %xmm4 +; AVX256VL-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0 ; AVX256VL-NEXT: vpsraw $1, %xmm0, %xmm3 -; AVX256VL-NEXT: vpaddw %xmm1, %xmm1, %xmm1 +; AVX256VL-NEXT: vpsllw $2, %xmm1, %xmm1 ; AVX256VL-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX256VL-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX256VL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/shuffle-as-shifts.ll b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll index 9c8729b3..4b8f78d 100644 --- a/llvm/test/CodeGen/X86/shuffle-as-shifts.ll +++ b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll @@ -15,20 +15,20 @@ define <4 x i32> @shuf_rot_v4i32_1032(<4 x i32> %x) { ; ; CHECK-ICX-LABEL: shuf_rot_v4i32_1032: ; CHECK-ICX: # %bb.0: -; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] +; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-ICX-NEXT: retq ; ; CHECK-V4-LABEL: shuf_rot_v4i32_1032: ; CHECK-V4: # %bb.0: -; CHECK-V4-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-V4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] +; CHECK-V4-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-V4-NEXT: retq ; ; CHECK-ZNVER4-LABEL: shuf_rot_v4i32_1032: ; CHECK-ZNVER4: # %bb.0: -; CHECK-ZNVER4-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] +; CHECK-ZNVER4-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-ZNVER4-NEXT: retq %x1 = add <4 x i32> %x, %x %r = shufflevector <4 x i32> %x1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 0, i32 3, i32 2> @@ -44,20 +44,20 @@ define <8 x i32> @shuf_rot_v8i32_10325476(<8 x i32> %x) { ; ; CHECK-ICX-LABEL: shuf_rot_v8i32_10325476: ; CHECK-ICX: # %bb.0: -; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] +; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-ICX-NEXT: retq ; ; CHECK-V4-LABEL: shuf_rot_v8i32_10325476: ; CHECK-V4: # %bb.0: -; CHECK-V4-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-V4-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] +; CHECK-V4-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-V4-NEXT: retq ; ; CHECK-ZNVER4-LABEL: shuf_rot_v8i32_10325476: ; CHECK-ZNVER4: # %bb.0: -; CHECK-ZNVER4-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] +; CHECK-ZNVER4-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-ZNVER4-NEXT: retq %x1 = add <8 x i32> %x, %x %r = shufflevector <8 x i32> %x1, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> @@ -73,20 +73,20 @@ define <16 x i32> @shuf_rot_v16i32_1032547698111013121514(<16 x i32> %x) { ; ; CHECK-ICX-LABEL: shuf_rot_v16i32_1032547698111013121514: ; CHECK-ICX: # %bb.0: -; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-ICX-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-ICX-NEXT: retq ; ; CHECK-V4-LABEL: shuf_rot_v16i32_1032547698111013121514: ; CHECK-V4: # %bb.0: -; CHECK-V4-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-V4-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; CHECK-V4-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-V4-NEXT: retq ; ; CHECK-ZNVER4-LABEL: shuf_rot_v16i32_1032547698111013121514: ; CHECK-ZNVER4: # %bb.0: -; CHECK-ZNVER4-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; CHECK-ZNVER4-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-ZNVER4-NEXT: retq %x1 = add <16 x i32> %x, %x %r = shufflevector <16 x i32> %x1, <16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> @@ -168,20 +168,20 @@ define <4 x i32> @shuf_shr_v4i32_1U3U(<4 x i32> %x) { ; ; CHECK-ICX-LABEL: shuf_shr_v4i32_1U3U: ; CHECK-ICX: # %bb.0: -; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-ICX-NEXT: retq ; ; CHECK-V4-LABEL: shuf_shr_v4i32_1U3U: ; CHECK-V4: # %bb.0: -; CHECK-V4-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-V4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; CHECK-V4-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-V4-NEXT: retq ; ; CHECK-ZNVER4-LABEL: shuf_shr_v4i32_1U3U: ; CHECK-ZNVER4: # %bb.0: -; CHECK-ZNVER4-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; CHECK-ZNVER4-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-ZNVER4-NEXT: retq %x1 = add <4 x i32> %x, %x %r = shufflevector <4 x i32> %x1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 undef, i32 3, i32 undef> @@ -197,20 +197,20 @@ define <8 x i32> @shuf_shr_v8i32_1U3U5U7U(<8 x i32> %x) { ; ; CHECK-ICX-LABEL: shuf_shr_v8i32_1U3U5U7U: ; CHECK-ICX: # %bb.0: -; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] +; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-ICX-NEXT: retq ; ; CHECK-V4-LABEL: shuf_shr_v8i32_1U3U5U7U: ; CHECK-V4: # %bb.0: -; CHECK-V4-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-V4-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] +; CHECK-V4-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-V4-NEXT: retq ; ; CHECK-ZNVER4-LABEL: shuf_shr_v8i32_1U3U5U7U: ; CHECK-ZNVER4: # %bb.0: -; CHECK-ZNVER4-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] +; CHECK-ZNVER4-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-ZNVER4-NEXT: retq %x1 = add <8 x i32> %x, %x %r = shufflevector <8 x i32> %x1, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7, i32 undef> @@ -226,20 +226,20 @@ define <16 x i32> @shuf_shr_v16i32_U3U5U7U9U11U13U15(<16 x i32> %x) { ; ; CHECK-ICX-LABEL: shuf_shr_v16i32_U3U5U7U9U11U13U15: ; CHECK-ICX: # %bb.0: -; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-ICX-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-ICX-NEXT: retq ; ; CHECK-V4-LABEL: shuf_shr_v16i32_U3U5U7U9U11U13U15: ; CHECK-V4: # %bb.0: -; CHECK-V4-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-V4-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; CHECK-V4-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-V4-NEXT: retq ; ; CHECK-ZNVER4-LABEL: shuf_shr_v16i32_U3U5U7U9U11U13U15: ; CHECK-ZNVER4: # %bb.0: -; CHECK-ZNVER4-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; CHECK-ZNVER4-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-ZNVER4-NEXT: retq %x1 = add <16 x i32> %x, %x %r = shufflevector <16 x i32> %x1, <16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 9, i32 undef, i32 11, i32 undef, i32 13, i32 undef, i32 15, i32 undef> @@ -288,20 +288,20 @@ define <4 x i32> @shuf_shl_v4i32_U0U2(<4 x i32> %x) { ; ; CHECK-ICX-LABEL: shuf_shl_v4i32_U0U2: ; CHECK-ICX: # %bb.0: -; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] +; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-ICX-NEXT: retq ; ; CHECK-V4-LABEL: shuf_shl_v4i32_U0U2: ; CHECK-V4: # %bb.0: -; CHECK-V4-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-V4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] +; CHECK-V4-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-V4-NEXT: retq ; ; CHECK-ZNVER4-LABEL: shuf_shl_v4i32_U0U2: ; CHECK-ZNVER4: # %bb.0: -; CHECK-ZNVER4-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] +; CHECK-ZNVER4-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; CHECK-ZNVER4-NEXT: retq %x1 = add <4 x i32> %x, %x %r = shufflevector <4 x i32> %x1, <4 x i32> zeroinitializer, <4 x i32> <i32 undef, i32 0, i32 undef, i32 2> @@ -317,20 +317,20 @@ define <8 x i32> @shuf_shl_v8i32_U0U2U4U6(<8 x i32> %x) { ; ; CHECK-ICX-LABEL: shuf_shl_v8i32_U0U2U4U6: ; CHECK-ICX: # %bb.0: -; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] +; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-ICX-NEXT: retq ; ; CHECK-V4-LABEL: shuf_shl_v8i32_U0U2U4U6: ; CHECK-V4: # %bb.0: -; CHECK-V4-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-V4-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] +; CHECK-V4-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-V4-NEXT: retq ; ; CHECK-ZNVER4-LABEL: shuf_shl_v8i32_U0U2U4U6: ; CHECK-ZNVER4: # %bb.0: -; CHECK-ZNVER4-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] +; CHECK-ZNVER4-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; CHECK-ZNVER4-NEXT: retq %x1 = add <8 x i32> %x, %x %r = shufflevector <8 x i32> %x1, <8 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 undef, i32 4, i32 undef, i32 6> @@ -346,20 +346,20 @@ define <16 x i32> @shuf_shl_v16i32_U0U2U4U6U8U10U12U14(<16 x i32> %x) { ; ; CHECK-ICX-LABEL: shuf_shl_v16i32_U0U2U4U6U8U10U12U14: ; CHECK-ICX: # %bb.0: -; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-ICX-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-ICX-NEXT: retq ; ; CHECK-V4-LABEL: shuf_shl_v16i32_U0U2U4U6U8U10U12U14: ; CHECK-V4: # %bb.0: -; CHECK-V4-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-V4-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; CHECK-V4-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-V4-NEXT: retq ; ; CHECK-ZNVER4-LABEL: shuf_shl_v16i32_U0U2U4U6U8U10U12U14: ; CHECK-ZNVER4: # %bb.0: -; CHECK-ZNVER4-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-ZNVER4-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; CHECK-ZNVER4-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; CHECK-ZNVER4-NEXT: retq %x1 = add <16 x i32> %x, %x %r = shufflevector <16 x i32> %x1, <16 x i32> zeroinitializer, <16 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 undef, i32 4, i32 undef, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 undef, i32 12, i32 undef, i32 14> diff --git a/llvm/test/CodeGen/X86/sshl_sat_vec.ll b/llvm/test/CodeGen/X86/sshl_sat_vec.ll index f91758b..10dee14 100644 --- a/llvm/test/CodeGen/X86/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sshl_sat_vec.ll @@ -602,10 +602,10 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; X64-AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; X64-AVX2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 ; X64-AVX2-NEXT: vpsraw $2, %xmm3, %xmm4 -; X64-AVX2-NEXT: vpaddw %xmm5, %xmm5, %xmm5 -; X64-AVX2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 +; X64-AVX2-NEXT: vpaddw %xmm5, %xmm5, %xmm6 +; X64-AVX2-NEXT: vpblendvb %xmm6, %xmm4, %xmm3, %xmm3 ; X64-AVX2-NEXT: vpsraw $1, %xmm3, %xmm4 -; X64-AVX2-NEXT: vpaddw %xmm5, %xmm5, %xmm5 +; X64-AVX2-NEXT: vpsllw $2, %xmm5, %xmm5 ; X64-AVX2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 ; X64-AVX2-NEXT: vpsrlw $8, %xmm3, %xmm3 ; X64-AVX2-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -613,10 +613,10 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; X64-AVX2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm5, %xmm4, %xmm4 ; X64-AVX2-NEXT: vpsraw $2, %xmm4, %xmm5 -; X64-AVX2-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm5, %xmm4, %xmm4 +; X64-AVX2-NEXT: vpaddw %xmm1, %xmm1, %xmm6 +; X64-AVX2-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4 ; X64-AVX2-NEXT: vpsraw $1, %xmm4, %xmm5 -; X64-AVX2-NEXT: vpaddw %xmm1, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpsllw $2, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm5, %xmm4, %xmm1 ; X64-AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index 20be579..9b52857 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -536,14 +536,14 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) ; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm5 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm6 ; AVX1-NEXT: vpblendvb %xmm4, %xmm6, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm4 -; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm4 -; AVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5 -; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm4 -; AVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5 -; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm6 +; AVX1-NEXT: vpblendvb %xmm5, %xmm6, %xmm1, %xmm1 +; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm5 +; AVX1-NEXT: vpsllw $2, %xmm4, %xmm6 +; AVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm5 +; AVX1-NEXT: vpsllw $3, %xmm4, %xmm4 +; AVX1-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm2[4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpslld $23, %xmm3, %xmm3 diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index 1f16463..a387562 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -328,15 +328,15 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> % ; AVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm6 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 ; AVX1-NEXT: vpsrlw $8, %xmm7, %xmm8 -; AVX1-NEXT: vpblendvb %xmm5, %xmm8, %xmm7, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm5, %xmm7 -; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm5, %xmm5 -; AVX1-NEXT: vpsrlw $2, %xmm5, %xmm7 -; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm6 -; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm5, %xmm5 -; AVX1-NEXT: vpsrlw $1, %xmm5, %xmm7 -; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm6 -; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm5, %xmm5 +; AVX1-NEXT: vpblendvb %xmm5, %xmm8, %xmm7, %xmm7 +; AVX1-NEXT: vpsrlw $4, %xmm7, %xmm8 +; AVX1-NEXT: vpblendvb %xmm6, %xmm8, %xmm7, %xmm6 +; AVX1-NEXT: vpsrlw $2, %xmm6, %xmm7 +; AVX1-NEXT: vpsllw $2, %xmm5, %xmm8 +; AVX1-NEXT: vpblendvb %xmm8, %xmm7, %xmm6, %xmm6 +; AVX1-NEXT: vpsrlw $1, %xmm6, %xmm7 +; AVX1-NEXT: vpsllw $3, %xmm5, %xmm5 +; AVX1-NEXT: vpblendvb %xmm5, %xmm7, %xmm6, %xmm5 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm6 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm6[4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpslld $23, %xmm4, %xmm7 @@ -358,14 +358,14 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> % ; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm7 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm8 ; AVX1-NEXT: vpblendvb %xmm6, %xmm8, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm6 -; AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm6 -; AVX1-NEXT: vpaddw %xmm7, %xmm7, %xmm7 -; AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm6 -; AVX1-NEXT: vpaddw %xmm7, %xmm7, %xmm7 -; AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm1, %xmm1 +; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm8 +; AVX1-NEXT: vpblendvb %xmm7, %xmm8, %xmm1, %xmm1 +; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm7 +; AVX1-NEXT: vpsllw $2, %xmm6, %xmm8 +; AVX1-NEXT: vpblendvb %xmm8, %xmm7, %xmm1, %xmm1 +; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm7 +; AVX1-NEXT: vpsllw $3, %xmm6, %xmm6 +; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm2[4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpslld $23, %xmm3, %xmm3 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index 02f0f53..d565ef0 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -293,14 +293,14 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vpsraw $8, %xmm0, %xmm3 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $4, %xmm0, %xmm1 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $4, %xmm0, %xmm3 +; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $2, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3 +; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $1, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 +; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: var_shift_v8i16: @@ -494,10 +494,10 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX-NEXT: vpsraw $4, %xmm3, %xmm4 ; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 ; AVX-NEXT: vpsraw $2, %xmm3, %xmm4 -; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 +; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm5 +; AVX-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 ; AVX-NEXT: vpsraw $1, %xmm3, %xmm4 -; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpsllw $2, %xmm2, %xmm2 ; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 ; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -505,10 +505,10 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX-NEXT: vpsraw $4, %xmm0, %xmm3 ; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vpsraw $2, %xmm0, %xmm3 -; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm4 +; AVX-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vpsraw $1, %xmm0, %xmm3 -; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpsllw $2, %xmm1, %xmm1 ; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index 15855e3..249bcba 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -237,29 +237,29 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; AVX1-NEXT: vpsraw $8, %xmm4, %xmm5 -; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2 -; AVX1-NEXT: vpsraw $4, %xmm2, %xmm4 -; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpsraw $2, %xmm2, %xmm4 -; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpsraw $1, %xmm2, %xmm4 -; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpsraw $4, %xmm4, %xmm5 +; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3 +; AVX1-NEXT: vpsraw $2, %xmm3, %xmm4 +; AVX1-NEXT: vpsllw $2, %xmm2, %xmm5 +; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpsraw $1, %xmm3, %xmm4 +; AVX1-NEXT: vpsllw $3, %xmm2, %xmm2 +; AVX1-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 ; AVX1-NEXT: vpsllw $12, %xmm1, %xmm3 ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3 ; AVX1-NEXT: vpsraw $8, %xmm0, %xmm4 ; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $4, %xmm0, %xmm1 -; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $4, %xmm0, %xmm4 +; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $2, %xmm0, %xmm3 +; AVX1-NEXT: vpsllw $2, %xmm1, %xmm4 +; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $1, %xmm0, %xmm3 +; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 +; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -339,29 +339,29 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; X86-AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; X86-AVX1-NEXT: vpsraw $8, %xmm4, %xmm5 -; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2 -; X86-AVX1-NEXT: vpsraw $4, %xmm2, %xmm4 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 -; X86-AVX1-NEXT: vpsraw $2, %xmm2, %xmm4 -; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 -; X86-AVX1-NEXT: vpsraw $1, %xmm2, %xmm4 -; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 +; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 +; X86-AVX1-NEXT: vpsraw $4, %xmm4, %xmm5 +; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3 +; X86-AVX1-NEXT: vpsraw $2, %xmm3, %xmm4 +; X86-AVX1-NEXT: vpsllw $2, %xmm2, %xmm5 +; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 +; X86-AVX1-NEXT: vpsraw $1, %xmm3, %xmm4 +; X86-AVX1-NEXT: vpsllw $3, %xmm2, %xmm2 +; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 ; X86-AVX1-NEXT: vpsllw $12, %xmm1, %xmm3 ; X86-AVX1-NEXT: vpsllw $4, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3 ; X86-AVX1-NEXT: vpsraw $8, %xmm0, %xmm4 ; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsraw $4, %xmm0, %xmm1 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsraw $2, %xmm0, %xmm1 -; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsraw $1, %xmm0, %xmm1 -; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsraw $4, %xmm0, %xmm4 +; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsraw $2, %xmm0, %xmm3 +; X86-AVX1-NEXT: vpsllw $2, %xmm1, %xmm4 +; X86-AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsraw $1, %xmm0, %xmm3 +; X86-AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 +; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X86-AVX1-NEXT: retl ; @@ -393,10 +393,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX1-NEXT: vpsraw $4, %xmm5, %xmm6 ; AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5 ; AVX1-NEXT: vpsraw $2, %xmm5, %xmm6 -; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5 +; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm7 +; AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm5, %xmm5 ; AVX1-NEXT: vpsraw $1, %xmm5, %xmm6 -; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpsllw $2, %xmm3, %xmm3 ; AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm3 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -404,10 +404,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX1-NEXT: vpsraw $4, %xmm4, %xmm5 ; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vpsraw $2, %xmm4, %xmm5 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm6 +; AVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vpsraw $1, %xmm4, %xmm5 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpsllw $2, %xmm2, %xmm2 ; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 @@ -417,10 +417,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX1-NEXT: vpsraw $4, %xmm4, %xmm5 ; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vpsraw $2, %xmm4, %xmm5 -; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm6 +; AVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vpsraw $1, %xmm4, %xmm5 -; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpsllw $2, %xmm3, %xmm3 ; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -428,10 +428,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX1-NEXT: vpsraw $4, %xmm0, %xmm4 ; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpsraw $2, %xmm0, %xmm4 -; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm5 +; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpsraw $1, %xmm0, %xmm4 -; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpsllw $2, %xmm1, %xmm1 ; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 @@ -446,10 +446,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX2-NEXT: vpsraw $4, %ymm3, %ymm4 ; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 ; AVX2-NEXT: vpsraw $2, %ymm3, %ymm4 -; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm5 +; AVX2-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 ; AVX2-NEXT: vpsraw $1, %ymm3, %ymm4 -; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpsllw $2, %ymm2, %ymm2 ; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 ; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -457,10 +457,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX2-NEXT: vpsraw $4, %ymm0, %ymm3 ; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: vpsraw $2, %ymm0, %ymm3 -; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm4 +; AVX2-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: vpsraw $1, %ymm0, %ymm3 -; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpsllw $2, %ymm1, %ymm1 ; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 @@ -498,10 +498,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512DQ-NEXT: vpsraw $4, %ymm3, %ymm4 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 ; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4 -; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm5 +; AVX512DQ-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 ; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4 -; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpsllw $2, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 ; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -509,10 +509,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512DQ-NEXT: vpsraw $4, %ymm0, %ymm3 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm3 -; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm4 +; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm3 -; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 @@ -534,10 +534,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512DQVL-NEXT: vpsraw $4, %ymm3, %ymm4 ; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 ; AVX512DQVL-NEXT: vpsraw $2, %ymm3, %ymm4 -; AVX512DQVL-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX512DQVL-NEXT: vpaddw %ymm2, %ymm2, %ymm5 +; AVX512DQVL-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 ; AVX512DQVL-NEXT: vpsraw $1, %ymm3, %ymm4 -; AVX512DQVL-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; AVX512DQVL-NEXT: vpsllw $2, %ymm2, %ymm2 ; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 ; AVX512DQVL-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -545,10 +545,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512DQVL-NEXT: vpsraw $4, %ymm0, %ymm3 ; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpsraw $2, %ymm0, %ymm3 -; AVX512DQVL-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpaddw %ymm1, %ymm1, %ymm4 +; AVX512DQVL-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpsraw $1, %ymm0, %ymm3 -; AVX512DQVL-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; AVX512DQVL-NEXT: vpsllw $2, %ymm1, %ymm1 ; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 @@ -572,10 +572,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; X86-AVX1-NEXT: vpsraw $4, %xmm5, %xmm6 ; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5 ; X86-AVX1-NEXT: vpsraw $2, %xmm5, %xmm6 -; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm5 +; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm7 +; X86-AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm5, %xmm5 ; X86-AVX1-NEXT: vpsraw $1, %xmm5, %xmm6 -; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 +; X86-AVX1-NEXT: vpsllw $2, %xmm3, %xmm3 ; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm5, %xmm3 ; X86-AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; X86-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -583,10 +583,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; X86-AVX1-NEXT: vpsraw $4, %xmm4, %xmm5 ; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 ; X86-AVX1-NEXT: vpsraw $2, %xmm4, %xmm5 -; X86-AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 +; X86-AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm6 +; X86-AVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4 ; X86-AVX1-NEXT: vpsraw $1, %xmm4, %xmm5 -; X86-AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; X86-AVX1-NEXT: vpsllw $2, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2 ; X86-AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 @@ -596,10 +596,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; X86-AVX1-NEXT: vpsraw $4, %xmm4, %xmm5 ; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4 ; X86-AVX1-NEXT: vpsraw $2, %xmm4, %xmm5 -; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm4 +; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm6 +; X86-AVX1-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4 ; X86-AVX1-NEXT: vpsraw $1, %xmm4, %xmm5 -; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 +; X86-AVX1-NEXT: vpsllw $2, %xmm3, %xmm3 ; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3 ; X86-AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; X86-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -607,10 +607,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; X86-AVX1-NEXT: vpsraw $4, %xmm0, %xmm4 ; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpsraw $2, %xmm0, %xmm4 -; X86-AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm5 +; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpsraw $1, %xmm0, %xmm4 -; X86-AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm1 +; X86-AVX1-NEXT: vpsllw $2, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 @@ -625,10 +625,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; X86-AVX2-NEXT: vpsraw $4, %ymm3, %ymm4 ; X86-AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 ; X86-AVX2-NEXT: vpsraw $2, %ymm3, %ymm4 -; X86-AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; X86-AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; X86-AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm5 +; X86-AVX2-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 ; X86-AVX2-NEXT: vpsraw $1, %ymm3, %ymm4 -; X86-AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; X86-AVX2-NEXT: vpsllw $2, %ymm2, %ymm2 ; X86-AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 ; X86-AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 ; X86-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -636,10 +636,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; X86-AVX2-NEXT: vpsraw $4, %ymm0, %ymm3 ; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; X86-AVX2-NEXT: vpsraw $2, %ymm0, %ymm3 -; X86-AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm4 +; X86-AVX2-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 ; X86-AVX2-NEXT: vpsraw $1, %ymm0, %ymm3 -; X86-AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; X86-AVX2-NEXT: vpsllw $2, %ymm1, %ymm1 ; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; X86-AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; X86-AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll index ea0745b..0fb0420 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -59,10 +59,10 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm6 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm6, %ymm5, %ymm5 ; AVX512DQ-NEXT: vpsraw $2, %ymm5, %ymm6 -; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm6, %ymm5, %ymm5 +; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm7 +; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm6, %ymm5, %ymm5 ; AVX512DQ-NEXT: vpsraw $1, %ymm5, %ymm6 -; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpsllw $2, %ymm3, %ymm3 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm6, %ymm5, %ymm3 ; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -70,10 +70,10 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4 ; AVX512DQ-NEXT: vpsraw $2, %ymm4, %ymm5 -; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4 +; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm6 +; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm5, %ymm4, %ymm4 ; AVX512DQ-NEXT: vpsraw $1, %ymm4, %ymm5 -; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpsllw $2, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm2 ; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 @@ -83,10 +83,10 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4 ; AVX512DQ-NEXT: vpsraw $2, %ymm4, %ymm5 -; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4 +; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm6 +; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm5, %ymm4, %ymm4 ; AVX512DQ-NEXT: vpsraw $1, %ymm4, %ymm5 -; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpsllw $2, %ymm3, %ymm3 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3 ; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] @@ -94,10 +94,10 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512DQ-NEXT: vpsraw $4, %ymm0, %ymm4 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm4 -; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm5 +; AVX512DQ-NEXT: vpblendvb %ymm5, %ymm4, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm4 -; AVX512DQ-NEXT: vpaddw %ymm1, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 @@ -113,11 +113,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} ; AVX512BW-NEXT: vpsraw $2, %zmm2, %zmm3 -; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4 -; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 +; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm5 +; AVX512BW-NEXT: vpmovb2m %zmm5, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} ; AVX512BW-NEXT: vpsraw $1, %zmm2, %zmm3 -; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4 +; AVX512BW-NEXT: vpsllw $2, %zmm4, %zmm4 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 @@ -127,11 +127,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1} ; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm3 -; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1 -; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm4 +; AVX512BW-NEXT: vpmovb2m %zmm4, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1} ; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm3 -; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsllw $2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1} ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll index f7de8d4..c5d3297 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll @@ -196,14 +196,14 @@ define <4 x i16> @var_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind { ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vpsraw $8, %xmm0, %xmm3 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $4, %xmm0, %xmm1 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $4, %xmm0, %xmm3 +; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $2, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3 +; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $1, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 +; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: var_shift_v4i16: @@ -367,14 +367,14 @@ define <2 x i16> @var_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind { ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vpsraw $8, %xmm0, %xmm3 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $4, %xmm0, %xmm1 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $2, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $4, %xmm0, %xmm3 +; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $2, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3 +; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $1, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 +; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: var_shift_v2i16: @@ -568,10 +568,10 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind { ; AVX-NEXT: vpsraw $4, %xmm3, %xmm4 ; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 ; AVX-NEXT: vpsraw $2, %xmm3, %xmm4 -; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 +; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm5 +; AVX-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 ; AVX-NEXT: vpsraw $1, %xmm3, %xmm4 -; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpsllw $2, %xmm2, %xmm2 ; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 ; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -579,10 +579,10 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind { ; AVX-NEXT: vpsraw $4, %xmm0, %xmm3 ; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vpsraw $2, %xmm0, %xmm3 -; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm4 +; AVX-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vpsraw $1, %xmm0, %xmm3 -; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpsllw $2, %xmm1, %xmm1 ; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -796,10 +796,10 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind { ; AVX-NEXT: vpsraw $4, %xmm3, %xmm4 ; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 ; AVX-NEXT: vpsraw $2, %xmm3, %xmm4 -; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 +; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm5 +; AVX-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 ; AVX-NEXT: vpsraw $1, %xmm3, %xmm4 -; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpsllw $2, %xmm2, %xmm2 ; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 ; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -807,10 +807,10 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind { ; AVX-NEXT: vpsraw $4, %xmm0, %xmm3 ; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vpsraw $2, %xmm0, %xmm3 -; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm4 +; AVX-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vpsraw $1, %xmm0, %xmm3 -; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpsllw $2, %xmm1, %xmm1 ; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -1024,10 +1024,10 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind { ; AVX-NEXT: vpsraw $4, %xmm3, %xmm4 ; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 ; AVX-NEXT: vpsraw $2, %xmm3, %xmm4 -; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3 +; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm5 +; AVX-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 ; AVX-NEXT: vpsraw $1, %xmm3, %xmm4 -; AVX-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpsllw $2, %xmm2, %xmm2 ; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 ; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -1035,10 +1035,10 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind { ; AVX-NEXT: vpsraw $4, %xmm0, %xmm3 ; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vpsraw $2, %xmm0, %xmm3 -; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm4 +; AVX-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vpsraw $1, %xmm0, %xmm3 -; AVX-NEXT: vpaddw %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpsllw $2, %xmm1, %xmm1 ; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll index 1d1697a..8cb2c7b 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -262,14 +262,14 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3 +; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3 +; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 +; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: var_shift_v8i16: diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll index 3a4bb22..606adb4 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -198,29 +198,29 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm5 -; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm4 -; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm4 -; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm4 -; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpsrlw $4, %xmm4, %xmm5 +; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3 +; AVX1-NEXT: vpsrlw $2, %xmm3, %xmm4 +; AVX1-NEXT: vpsllw $2, %xmm2, %xmm5 +; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpsrlw $1, %xmm3, %xmm4 +; AVX1-NEXT: vpsllw $3, %xmm2, %xmm2 +; AVX1-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 ; AVX1-NEXT: vpsllw $12, %xmm1, %xmm3 ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1 ; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm4 ; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 +; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm3 +; AVX1-NEXT: vpsllw $2, %xmm1, %xmm4 +; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3 +; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 +; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -300,29 +300,29 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; X86-AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; X86-AVX1-NEXT: vpsrlw $8, %xmm4, %xmm5 -; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm2 -; X86-AVX1-NEXT: vpsrlw $4, %xmm2, %xmm4 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 -; X86-AVX1-NEXT: vpsrlw $2, %xmm2, %xmm4 -; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 -; X86-AVX1-NEXT: vpsrlw $1, %xmm2, %xmm4 -; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm2, %xmm2 +; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm4, %xmm4 +; X86-AVX1-NEXT: vpsrlw $4, %xmm4, %xmm5 +; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3 +; X86-AVX1-NEXT: vpsrlw $2, %xmm3, %xmm4 +; X86-AVX1-NEXT: vpsllw $2, %xmm2, %xmm5 +; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3 +; X86-AVX1-NEXT: vpsrlw $1, %xmm3, %xmm4 +; X86-AVX1-NEXT: vpsllw $3, %xmm2, %xmm2 +; X86-AVX1-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2 ; X86-AVX1-NEXT: vpsllw $12, %xmm1, %xmm3 ; X86-AVX1-NEXT: vpsllw $4, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3 ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm4 ; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1 -; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1 -; X86-AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 +; X86-AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsrlw $2, %xmm0, %xmm3 +; X86-AVX1-NEXT: vpsllw $2, %xmm1, %xmm4 +; X86-AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3 +; X86-AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 +; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X86-AVX1-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll index 7928111..57874c4 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll @@ -196,14 +196,14 @@ define <4 x i16> @var_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind { ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3 +; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3 +; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 +; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: var_shift_v4i16: @@ -367,14 +367,14 @@ define <2 x i16> @var_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind { ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3 +; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3 +; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 +; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: var_shift_v2i16: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll index dbbfaab..be41945 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -8079,14 +8079,14 @@ define <16 x i16> @pr43230(<16 x i16> %a, <16 x i16> %b) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3 +; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $2, %xmm1, %xmm3 +; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2 +; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 +; AVX1-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: retq |