aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/avx-shift.ll2
-rw-r--r--llvm/test/CodeGen/X86/avx2-arith.ll4
-rw-r--r--llvm/test/CodeGen/X86/combine-mul.ll22
-rw-r--r--llvm/test/CodeGen/X86/combine-multiplies.ll4
-rw-r--r--llvm/test/CodeGen/X86/combine-pmuldq.ll24
-rw-r--r--llvm/test/CodeGen/X86/combine-rotates.ll4
-rw-r--r--llvm/test/CodeGen/X86/combine-sdiv.ll8
-rw-r--r--llvm/test/CodeGen/X86/combine-shl.ll54
-rw-r--r--llvm/test/CodeGen/X86/combine-srem.ll10
-rw-r--r--llvm/test/CodeGen/X86/combine-udiv.ll10
-rw-r--r--llvm/test/CodeGen/X86/combine-umax.ll2
-rw-r--r--llvm/test/CodeGen/X86/combine-umin.ll2
-rw-r--r--llvm/test/CodeGen/X86/combine-urem.ll4
-rw-r--r--llvm/test/CodeGen/X86/dagcombine-shifts.ll4
-rw-r--r--llvm/test/CodeGen/X86/funnel-shift.ll8
-rw-r--r--llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll8
-rw-r--r--llvm/test/CodeGen/X86/known-pow2.ll6
-rw-r--r--llvm/test/CodeGen/X86/madd.ll8
-rw-r--r--llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll16
-rw-r--r--llvm/test/CodeGen/X86/pmul.ll2
-rw-r--r--llvm/test/CodeGen/X86/pr162812.ll50
-rw-r--r--llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll2
-rw-r--r--llvm/test/CodeGen/X86/rotate-extract-vector.ll38
-rw-r--r--llvm/test/CodeGen/X86/sdiv-exact.ll18
-rw-r--r--llvm/test/CodeGen/X86/shrink_vmul.ll20
-rw-r--r--llvm/test/CodeGen/X86/slow-pmulld.ll8
-rw-r--r--llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll388
-rw-r--r--llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll24
-rw-r--r--llvm/test/CodeGen/X86/udiv-exact.ll18
-rw-r--r--llvm/test/CodeGen/X86/undo-mul-and.ll18
-rw-r--r--llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll8
-rw-r--r--llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll470
-rw-r--r--llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll20
-rw-r--r--llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll36
-rw-r--r--llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll20
-rw-r--r--llvm/test/CodeGen/X86/var-permute-128.ll8
-rw-r--r--llvm/test/CodeGen/X86/vec_reassociate.ll8
-rw-r--r--llvm/test/CodeGen/X86/vector-compress.ll70
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-128.ll12
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-256.ll4
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-rot-128.ll16
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-rot-256.ll8
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll16
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-sub128.ll12
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-128.ll12
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-256.ll4
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-rot-128.ll16
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-rot-256.ll8
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll16
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll36
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll32
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll32
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll56
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll56
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll56
-rw-r--r--llvm/test/CodeGen/X86/vector-mul.ll100
-rw-r--r--llvm/test/CodeGen/X86/vector-rotate-128.ll16
-rw-r--r--llvm/test/CodeGen/X86/vector-rotate-256.ll8
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-shl-128.ll12
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-shl-256.ll8
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll8
-rw-r--r--llvm/test/CodeGen/X86/vector-trunc-math.ll92
-rw-r--r--llvm/test/CodeGen/X86/vselect-avx.ll18
-rw-r--r--llvm/test/CodeGen/X86/vselect-pcmp.ll6
-rw-r--r--llvm/test/CodeGen/X86/zero-call-used-regs-simd.ll216
65 files changed, 1206 insertions, 1096 deletions
diff --git a/llvm/test/CodeGen/X86/avx-shift.ll b/llvm/test/CodeGen/X86/avx-shift.ll
index c9c09d7..3bce843 100644
--- a/llvm/test/CodeGen/X86/avx-shift.ll
+++ b/llvm/test/CodeGen/X86/avx-shift.ll
@@ -201,7 +201,7 @@ define <8 x i32> @vshift08_add(<8 x i32> %a, <8 x i32> %y) {
define <4 x i32> @vshift13(<4 x i32> %in) {
; CHECK-LABEL: vshift13:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,16]
; CHECK-NEXT: retq
%T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
ret <4 x i32> %T
diff --git a/llvm/test/CodeGen/X86/avx2-arith.ll b/llvm/test/CodeGen/X86/avx2-arith.ll
index 70b3b99..1133cdfd 100644
--- a/llvm/test/CodeGen/X86/avx2-arith.ll
+++ b/llvm/test/CodeGen/X86/avx2-arith.ll
@@ -199,12 +199,12 @@ define <8 x i32> @mul_const5(<8 x i32> %x) {
define <8 x i32> @mul_const6(<8 x i32> %x) {
; X86-LABEL: mul_const6:
; X86: # %bb.0:
-; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [0,0,0,2,0,2,0,0]
; X86-NEXT: retl
;
; X64-LABEL: mul_const6:
; X64: # %bb.0:
-; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,0,0,2,0,2,0,0]
; X64-NEXT: retq
%y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
ret <8 x i32> %y
diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll
index ae4d24f..29c41ca 100644
--- a/llvm/test/CodeGen/X86/combine-mul.ll
+++ b/llvm/test/CodeGen/X86/combine-mul.ll
@@ -66,7 +66,7 @@ define <4 x i32> @combine_vec_mul_pow2a(<4 x i32> %x) {
define <4 x i32> @combine_vec_mul_pow2b(<4 x i32> %x) {
; SSE-LABEL: combine_vec_mul_pow2b:
; SSE: # %bb.0:
-; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,16]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_mul_pow2b:
@@ -120,12 +120,12 @@ define <4 x i32> @combine_vec_mul_negpow2a(<4 x i32> %x) {
define <4 x i32> @combine_vec_mul_negpow2b(<4 x i32> %x) {
; SSE-LABEL: combine_vec_mul_negpow2b:
; SSE: # %bb.0:
-; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4294967295,4294967294,4294967292,4294967280]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_mul_negpow2b:
; AVX: # %bb.0:
-; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4294967295,4294967294,4294967292,4294967280]
; AVX-NEXT: retq
%1 = mul <4 x i32> %x, <i32 -1, i32 -2, i32 -4, i32 -16>
ret <4 x i32> %1
@@ -176,12 +176,12 @@ define <4 x i64> @combine_vec_mul_negpow2c(<4 x i64> %x) {
define <4 x i32> @combine_vec_mul_shl_const(<4 x i32> %x) {
; SSE-LABEL: combine_vec_mul_shl_const:
; SSE: # %bb.0:
-; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,12,1280,458752]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_mul_shl_const:
; AVX: # %bb.0:
-; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2,12,1280,458752]
; AVX-NEXT: retq
%1 = shl <4 x i32> %x, <i32 1, i32 2, i32 8, i32 16>
%2 = mul <4 x i32> %1, <i32 1, i32 3, i32 5, i32 7>
@@ -193,7 +193,7 @@ define <4 x i32> @combine_vec_mul_shl_oneuse0(<4 x i32> %x, <4 x i32> %y) {
; SSE-LABEL: combine_vec_mul_shl_oneuse0:
; SSE: # %bb.0:
; SSE-NEXT: pmulld %xmm1, %xmm0
-; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,256,65536]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_mul_shl_oneuse0:
@@ -210,7 +210,7 @@ define <4 x i32> @combine_vec_mul_shl_oneuse1(<4 x i32> %x, <4 x i32> %y) {
; SSE-LABEL: combine_vec_mul_shl_oneuse1:
; SSE: # %bb.0:
; SSE-NEXT: pmulld %xmm1, %xmm0
-; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,256,65536]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_mul_shl_oneuse1:
@@ -226,7 +226,7 @@ define <4 x i32> @combine_vec_mul_shl_oneuse1(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @combine_vec_mul_shl_multiuse0(<4 x i32> %x, <4 x i32> %y) {
; SSE-LABEL: combine_vec_mul_shl_multiuse0:
; SSE: # %bb.0:
-; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,256,65536]
; SSE-NEXT: pmulld %xmm0, %xmm1
; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: retq
@@ -246,7 +246,7 @@ define <4 x i32> @combine_vec_mul_shl_multiuse0(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @combine_vec_mul_shl_multiuse1(<4 x i32> %x, <4 x i32> %y) {
; SSE-LABEL: combine_vec_mul_shl_multiuse1:
; SSE: # %bb.0:
-; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,256,65536]
; SSE-NEXT: pmulld %xmm0, %xmm1
; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: retq
@@ -268,13 +268,13 @@ define <4 x i32> @combine_vec_mul_shl_multiuse1(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @combine_vec_mul_add(<4 x i32> %x) {
; SSE-LABEL: combine_vec_mul_add:
; SSE: # %bb.0:
-; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,6,2,0]
; SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_mul_add:
; AVX: # %bb.0:
-; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4,6,2,0]
; AVX-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%1 = add <4 x i32> %x, <i32 1, i32 2, i32 8, i32 16>
diff --git a/llvm/test/CodeGen/X86/combine-multiplies.ll b/llvm/test/CodeGen/X86/combine-multiplies.ll
index a5d9846..4bdf20d 100644
--- a/llvm/test/CodeGen/X86/combine-multiplies.ll
+++ b/llvm/test/CodeGen/X86/combine-multiplies.ll
@@ -142,9 +142,9 @@ define void @testCombineMultiplies_non_splat(<4 x i32> %v1) nounwind {
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [11,22,33,44]
; CHECK-NEXT: paddd %xmm0, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [22,33,44,55]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
+; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [33,u,55,u]
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [242,726,1452,2420]
diff --git a/llvm/test/CodeGen/X86/combine-pmuldq.ll b/llvm/test/CodeGen/X86/combine-pmuldq.ll
index 70335f8..ff5329c 100644
--- a/llvm/test/CodeGen/X86/combine-pmuldq.ll
+++ b/llvm/test/CodeGen/X86/combine-pmuldq.ll
@@ -204,16 +204,16 @@ define i32 @PR43159(ptr %a0) {
; SSE: # %bb.0: # %entry
; SSE-NEXT: movdqa (%rdi), %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [344322273,344322273,1916962805,1916962805]
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: psrld $1, %xmm2
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7]
; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,2147483648,2147483648]
; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: psrld $7, %xmm0
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1645975491,344322273,2164392969,1916962805]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE-NEXT: psrld $6, %xmm1
; SSE-NEXT: movd %xmm1, %edi
@@ -226,15 +226,15 @@ define i32 @PR43159(ptr %a0) {
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [344322273,344322273,1916962805,1916962805]
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpsrld $7, %xmm1, %xmm1
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1645975491,344322273,2164392969,1916962805]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX1-NEXT: vpsrld $6, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %edi
@@ -247,9 +247,9 @@ define i32 @PR43159(ptr %a0) {
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [344322273,u,1916962805,u]
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1645975491,344322273,2164392969,1916962805]
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
@@ -270,9 +270,9 @@ define i32 @PR43159(ptr %a0) {
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [344322273,u,1916962805,u]
; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512VL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1645975491,344322273,2164392969,1916962805]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
; AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
@@ -293,9 +293,9 @@ define i32 @PR43159(ptr %a0) {
; AVX512DQVL: # %bb.0: # %entry
; AVX512DQVL-NEXT: vmovdqa (%rdi), %xmm0
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512DQVL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [344322273,u,1916962805,u]
; AVX512DQVL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX512DQVL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512DQVL-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1645975491,344322273,2164392969,1916962805]
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
; AVX512DQVL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/combine-rotates.ll b/llvm/test/CodeGen/X86/combine-rotates.ll
index 65d74c8..e7152ec 100644
--- a/llvm/test/CodeGen/X86/combine-rotates.ll
+++ b/llvm/test/CodeGen/X86/combine-rotates.ll
@@ -10,9 +10,9 @@ define <4 x i32> @combine_vec_rot_rot(<4 x i32> %x) {
; SSE2-LABEL: combine_vec_rot_rot:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [524288,131072,32768,8192]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [131072,u,8192,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll
index 6bcbfe1..f7baee9 100644
--- a/llvm/test/CodeGen/X86/combine-sdiv.ll
+++ b/llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -2927,7 +2927,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) {
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,0,0,0,0,0,0,37632]
+; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147]
; SSE2-NEXT: psrlw $8, %xmm3
; SSE2-NEXT: packuswb %xmm3, %xmm1
; SSE2-NEXT: paddb %xmm1, %xmm0
@@ -2947,7 +2947,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) {
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,0,0,0,0,37632]
+; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147]
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: packuswb %xmm2, %xmm1
; SSE41-NEXT: paddb %xmm0, %xmm1
@@ -2971,7 +2971,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,37632]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm1
@@ -3044,7 +3044,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) {
; XOP: # %bb.0:
; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
; XOP-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,37632]
+; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147]
; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15],xmm2[1,3,5,7,9,11,13,15]
; XOP-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; XOP-NEXT: vpshab {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll
index 1ce10c37..9548967 100644
--- a/llvm/test/CodeGen/X86/combine-shl.ll
+++ b/llvm/test/CodeGen/X86/combine-shl.ll
@@ -88,7 +88,7 @@ define <4 x i32> @combine_vec_shl_known_zero1(<4 x i32> %x) {
; SSE2-NEXT: pmuludq %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,u,8192,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movdqa %xmm1, %xmm0
@@ -97,7 +97,7 @@ define <4 x i32> @combine_vec_shl_known_zero1(<4 x i32> %x) {
; SSE41-LABEL: combine_vec_shl_known_zero1:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65536,32768,16384,8192]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_vec_shl_known_zero1:
@@ -198,16 +198,16 @@ define <4 x i32> @combine_vec_shl_shl1(<4 x i32> %x) {
; SSE2-LABEL: combine_vec_shl_shl1:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,64,256,1024]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [64,u,1024,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: combine_vec_shl_shl1:
; SSE41: # %bb.0:
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,64,256,1024]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_vec_shl_shl1:
@@ -304,17 +304,17 @@ define <8 x i32> @combine_vec_shl_ext_shl2(<8 x i16> %x) {
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: psrad $16, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [131072,524288,2097152,8388608]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [524288,u,8388608,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [33554432,134217728,536870912,2147483648]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [134217728,u,2147483648,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movdqa %xmm2, %xmm0
@@ -323,10 +323,10 @@ define <8 x i32> @combine_vec_shl_ext_shl2(<8 x i16> %x) {
; SSE41-LABEL: combine_vec_shl_ext_shl2:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovsxwd %xmm0, %xmm2
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [131072,524288,2097152,8388608]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE41-NEXT: pmovsxwd %xmm0, %xmm1
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [33554432,134217728,536870912,2147483648]
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
@@ -673,9 +673,9 @@ define <4 x i32> @combine_vec_shl_add1(<4 x i32> %x) {
; SSE2-LABEL: combine_vec_shl_add1:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,8,16]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4,u,16,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -683,7 +683,7 @@ define <4 x i32> @combine_vec_shl_add1(<4 x i32> %x) {
;
; SSE41-LABEL: combine_vec_shl_add1:
; SSE41: # %bb.0:
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,8,16]
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
@@ -726,9 +726,9 @@ define <4 x i32> @combine_vec_shl_or1(<4 x i32> %x) {
; SSE2-LABEL: combine_vec_shl_or1:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,8,16]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4,u,16,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -736,7 +736,7 @@ define <4 x i32> @combine_vec_shl_or1(<4 x i32> %x) {
;
; SSE41-LABEL: combine_vec_shl_or1:
; SSE41: # %bb.0:
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,8,16]
; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
@@ -765,7 +765,7 @@ define <4 x i32> @combine_vec_shl_mul0(<4 x i32> %x) {
;
; SSE41-LABEL: combine_vec_shl_mul0:
; SSE41: # %bb.0:
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [20,20,20,20]
; SSE41-NEXT: retq
;
; AVX2-LABEL: combine_vec_shl_mul0:
@@ -787,21 +787,21 @@ define <4 x i32> @combine_vec_shl_mul1(<4 x i32> %x) {
; SSE2-LABEL: combine_vec_shl_mul1:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [10,24,56,128]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [24,u,128,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: combine_vec_shl_mul1:
; SSE41: # %bb.0:
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [10,24,56,128]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_vec_shl_mul1:
; AVX: # %bb.0:
-; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [10,24,56,128]
; AVX-NEXT: retq
%1 = mul <4 x i32> %x, <i32 5, i32 6, i32 7, i32 8>
%2 = shl <4 x i32> %1, <i32 1, i32 2, i32 3, i32 4>
@@ -813,9 +813,9 @@ define <4 x i32> @combine_vec_add_shl_nonsplat(<4 x i32> %a0) {
; SSE2-LABEL: combine_vec_add_shl_nonsplat:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,8,16,32]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [8,u,32,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -823,7 +823,7 @@ define <4 x i32> @combine_vec_add_shl_nonsplat(<4 x i32> %a0) {
;
; SSE41-LABEL: combine_vec_add_shl_nonsplat:
; SSE41: # %bb.0:
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,8,16,32]
; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
@@ -852,7 +852,7 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) {
; SSE2-NEXT: pmuludq %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [8,u,32,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -863,7 +863,7 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,8,16,32]
; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/combine-srem.ll b/llvm/test/CodeGen/X86/combine-srem.ll
index 4b01c16..0ca79ad 100644
--- a/llvm/test/CodeGen/X86/combine-srem.ll
+++ b/llvm/test/CodeGen/X86/combine-srem.ll
@@ -272,7 +272,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b(<4 x i32> %x) {
; SSE-NEXT: psrad $2, %xmm2
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
-; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1,2,4,8]
; SSE-NEXT: psubd %xmm2, %xmm0
; SSE-NEXT: retq
;
@@ -291,7 +291,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b(<4 x i32> %x) {
; AVX1-NEXT: vpsrad $2, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,2,4,8]
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
@@ -336,7 +336,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
; SSE-NEXT: psrld $1, %xmm1
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
-; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967294,4294967292,4294967288,4294967280]
; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: retq
;
@@ -358,7 +358,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [4294967294,4294967292,4294967288,4294967280]
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
@@ -368,7 +368,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [4294967294,4294967292,4294967288,4294967280]
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
%1 = srem <4 x i32> %x, <i32 -2, i32 -4, i32 -8, i32 -16>
diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll
index c90344b8..233735d 100644
--- a/llvm/test/CodeGen/X86/combine-udiv.ll
+++ b/llvm/test/CodeGen/X86/combine-udiv.ll
@@ -665,14 +665,12 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
;
; XOP-LABEL: combine_vec_udiv_nonuniform4:
; XOP: # %bb.0:
-; XOP-NEXT: movl $171, %eax
+; XOP-NEXT: movl $249, %eax
; XOP-NEXT: vmovd %eax, %xmm1
; XOP-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; XOP-NEXT: vpmullw %xmm1, %xmm2, %xmm1
-; XOP-NEXT: vpsrlw $8, %xmm1, %xmm1
-; XOP-NEXT: movl $249, %eax
-; XOP-NEXT: vmovd %eax, %xmm2
-; XOP-NEXT: vpshlb %xmm2, %xmm1, %xmm1
+; XOP-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [171,0,0,0]
+; XOP-NEXT: vpsrlw $8, %xmm2, %xmm2
+; XOP-NEXT: vpshlb %xmm1, %xmm2, %xmm1
; XOP-NEXT: vpmovsxwq {{.*#+}} xmm2 = [18446744073709551360,18446744073709551615]
; XOP-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; XOP-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/combine-umax.ll b/llvm/test/CodeGen/X86/combine-umax.ll
index 25f8ec8..482b4fc 100644
--- a/llvm/test/CodeGen/X86/combine-umax.ll
+++ b/llvm/test/CodeGen/X86/combine-umax.ll
@@ -60,7 +60,7 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: test_v16i8_demandedbits:
; SSE2: # %bb.0:
-; SSE2-NEXT: pmaxub %xmm1, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
; SSE2-NEXT: pand %xmm1, %xmm3
diff --git a/llvm/test/CodeGen/X86/combine-umin.ll b/llvm/test/CodeGen/X86/combine-umin.ll
index 76dbcb5..e2757d0 100644
--- a/llvm/test/CodeGen/X86/combine-umin.ll
+++ b/llvm/test/CodeGen/X86/combine-umin.ll
@@ -77,7 +77,7 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: test_v16i8_demandedbits:
; SSE2: # %bb.0:
-; SSE2-NEXT: pminub %xmm1, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
; SSE2-NEXT: pand %xmm1, %xmm3
diff --git a/llvm/test/CodeGen/X86/combine-urem.ll b/llvm/test/CodeGen/X86/combine-urem.ll
index 715d5c7..34c7d3d 100644
--- a/llvm/test/CodeGen/X86/combine-urem.ll
+++ b/llvm/test/CodeGen/X86/combine-urem.ll
@@ -327,7 +327,7 @@ define <4 x i32> @combine_vec_urem_by_shl_pow2b(<4 x i32> %x, <4 x i32> %y) {
; SSE-NEXT: pslld $23, %xmm1
; SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: cvttps2dq %xmm1, %xmm1
-; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,4,8,16]
; SSE-NEXT: pcmpeqd %xmm2, %xmm2
; SSE-NEXT: paddd %xmm1, %xmm2
; SSE-NEXT: pand %xmm2, %xmm0
@@ -338,7 +338,7 @@ define <4 x i32> @combine_vec_urem_by_shl_pow2b(<4 x i32> %x, <4 x i32> %y) {
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,4,8,16]
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/dagcombine-shifts.ll b/llvm/test/CodeGen/X86/dagcombine-shifts.ll
index 345b2b9..19b9452 100644
--- a/llvm/test/CodeGen/X86/dagcombine-shifts.ll
+++ b/llvm/test/CodeGen/X86/dagcombine-shifts.ll
@@ -437,9 +437,9 @@ define <4 x i32> @shift_zext_shl2_vec(<4 x i8> %x) nounwind {
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [512,256,128,64]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [256,u,64,u]
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index df97f49..252cb33 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -574,9 +574,9 @@ define <4 x i32> @fshl_v4i32_undef1_cst(<4 x i32> %a0) nounwind {
; X86-SSE2-LABEL: fshl_v4i32_undef1_cst:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [512,1024,2048,4096]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [1024,u,4096,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE2-NEXT: retl
@@ -746,9 +746,9 @@ define <4 x i32> @fshr_v4i32_undef1_cst(<4 x i32> %a0) nounwind {
; X86-SSE2-LABEL: fshr_v4i32_undef1_cst:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [8388608,4194304,2097152,1048576]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [4194304,u,1048576,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE2-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 1a2aac6..b45d01e 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -499,9 +499,9 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [0,1,16776960,2147483648]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [1,u,2147483648,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X86-SSE2-NEXT: pand %xmm1, %xmm0
@@ -524,9 +524,9 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,1,16776960,2147483648]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1,u,2147483648,u]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X64-SSE2-NEXT: pand %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index e183bbc..019bca7 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -28,16 +28,16 @@ define <4 x i32> @pow2_non_splat_vec_fail0(<4 x i32> %x) {
; CHECK-NEXT: pmuludq %xmm0, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1073741824,u,67108864,u]
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; CHECK-NEXT: movdqa %xmm1, %xmm3
; CHECK-NEXT: psrld $1, %xmm3
; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,3]
-; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [9,4,16,64]
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [4,u,64,u]
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; CHECK-NEXT: psubd %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll
index 4ec54d8..2a2a4a5 100644
--- a/llvm/test/CodeGen/X86/madd.ll
+++ b/llvm/test/CodeGen/X86/madd.ll
@@ -2057,10 +2057,10 @@ define <4 x i32> @pmaddwd_negative2(<8 x i16> %A) {
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
; SSE2-NEXT: psrad $16, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294934528,0,0,0]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,0,7,0,42,0,32,0]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [32768,4294934528,0,0]
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
; SSE2-NEXT: paddd %xmm2, %xmm1
@@ -2072,14 +2072,14 @@ define <4 x i32> @pmaddwd_negative2(<8 x i16> %A) {
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,7,42,32]
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32768,4294934528,0,0]
; AVX1-NEXT: vphaddd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX256-LABEL: pmaddwd_negative2:
; AVX256: # %bb.0:
; AVX256-NEXT: vpmovsxwd %xmm0, %ymm0
-; AVX256-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX256-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [32768,4294934528,0,0,1,7,42,32]
; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX256-NEXT: vphaddd %xmm1, %xmm0, %xmm0
; AVX256-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
index 693d199..9729fd7 100644
--- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
+++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
@@ -100,7 +100,7 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; SSE4-LABEL: p4_vector_urem_by_const__splat:
; SSE4: # %bb.0:
; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; SSE4-NEXT: psrld $1, %xmm0
; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883]
; SSE4-NEXT: pcmpgtd %xmm0, %xmm1
@@ -128,10 +128,10 @@ define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) {
; SSE2: # %bb.0:
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,u,954437177,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,3435973837,2863311531,954437177]
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,2147483648,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: psrlq $32, %xmm0
@@ -145,7 +145,7 @@ define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) {
; SSE4-LABEL: p5_vector_urem_by_const__nonsplat:
; SSE4: # %bb.0:
; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,3435973837,2863311531,954437177]
; SSE4-NEXT: pmovzxdq {{.*#+}} xmm1 = [1,2147483648]
; SSE4-NEXT: pmuludq %xmm0, %xmm1
; SSE4-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
@@ -159,7 +159,7 @@ define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) {
; AVX2-LABEL: p5_vector_urem_by_const__nonsplat:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,3435973837,2863311531,954437177]
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -196,7 +196,7 @@ define <4 x i1> @p6_vector_urem_by_const__nonsplat_undef0(<4 x i32> %x, <4 x i32
; SSE4-LABEL: p6_vector_urem_by_const__nonsplat_undef0:
; SSE4: # %bb.0:
; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; SSE4-NEXT: movdqa %xmm0, %xmm1
; SSE4-NEXT: psrld $1, %xmm1
; SSE4-NEXT: pslld $31, %xmm0
@@ -312,7 +312,7 @@ define <4 x i1> @p8_vector_urem_by_const__nonsplat_undef3(<4 x i32> %x, <4 x i32
; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; SSE4-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; SSE4-NEXT: psrld $2, %xmm2
-; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [6,6,6,6]
; SSE4-NEXT: psubd %xmm2, %xmm0
; SSE4-NEXT: pxor %xmm1, %xmm1
; SSE4-NEXT: pcmpeqd %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll
index 9aee2f1..00731fe 100644
--- a/llvm/test/CodeGen/X86/pmul.ll
+++ b/llvm/test/CodeGen/X86/pmul.ll
@@ -91,7 +91,7 @@ define <4 x i32> @mul_v4i32c(<4 x i32> %i) nounwind {
;
; SSE41-LABEL: mul_v4i32c:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [117,117,117,117]
; SSE41-NEXT: retq
;
; AVX-LABEL: mul_v4i32c:
diff --git a/llvm/test/CodeGen/X86/pr162812.ll b/llvm/test/CodeGen/X86/pr162812.ll
index 4ea3101..cec093c 100644
--- a/llvm/test/CodeGen/X86/pr162812.ll
+++ b/llvm/test/CodeGen/X86/pr162812.ll
@@ -34,61 +34,43 @@ define <32 x i8> @PR162812(<32 x i8> %a, <32 x i8> %mask) {
;
; SSE42-LABEL: PR162812:
; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm2, %xmm5
-; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: movdqa %xmm0, %xmm4
+; SSE42-NEXT: psrlw $2, %xmm2
+; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [8224,8224,8224,8224,8224,8224,8224,8224]
+; SSE42-NEXT: pand %xmm5, %xmm2
+; SSE42-NEXT: paddb %xmm2, %xmm2
+; SSE42-NEXT: paddb %xmm2, %xmm2
; SSE42-NEXT: movdqa %xmm0, %xmm6
-; SSE42-NEXT: psllw $2, %xmm6
-; SSE42-NEXT: movdqa {{.*#+}} xmm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; SSE42-NEXT: pand %xmm7, %xmm6
-; SSE42-NEXT: psrlw $2, %xmm5
-; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [8224,8224,8224,8224,8224,8224,8224,8224]
-; SSE42-NEXT: pand %xmm4, %xmm5
+; SSE42-NEXT: paddb %xmm0, %xmm6
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: pblendvb %xmm0, %xmm6, %xmm4
+; SSE42-NEXT: psrlw $2, %xmm3
+; SSE42-NEXT: pand %xmm3, %xmm5
; SSE42-NEXT: paddb %xmm5, %xmm5
-; SSE42-NEXT: movdqa %xmm5, %xmm0
-; SSE42-NEXT: pblendvb %xmm0, %xmm6, %xmm2
-; SSE42-NEXT: movdqa %xmm2, %xmm6
-; SSE42-NEXT: paddb %xmm2, %xmm6
; SSE42-NEXT: paddb %xmm5, %xmm5
+; SSE42-NEXT: movdqa %xmm1, %xmm2
+; SSE42-NEXT: paddb %xmm1, %xmm2
; SSE42-NEXT: movdqa %xmm5, %xmm0
-; SSE42-NEXT: pblendvb %xmm0, %xmm6, %xmm2
-; SSE42-NEXT: movdqa %xmm1, %xmm5
-; SSE42-NEXT: psllw $2, %xmm5
-; SSE42-NEXT: pand %xmm7, %xmm5
-; SSE42-NEXT: psrlw $2, %xmm3
-; SSE42-NEXT: pand %xmm3, %xmm4
-; SSE42-NEXT: paddb %xmm4, %xmm4
-; SSE42-NEXT: movdqa %xmm4, %xmm0
-; SSE42-NEXT: pblendvb %xmm0, %xmm5, %xmm1
-; SSE42-NEXT: movdqa %xmm1, %xmm3
-; SSE42-NEXT: paddb %xmm1, %xmm3
-; SSE42-NEXT: paddb %xmm4, %xmm4
+; SSE42-NEXT: pblendvb %xmm0, %xmm2, %xmm1
; SSE42-NEXT: movdqa %xmm4, %xmm0
-; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm1
-; SSE42-NEXT: movdqa %xmm2, %xmm0
; SSE42-NEXT: retq
;
; AVX2-LABEL: PR162812:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2
; AVX2-NEXT: vpsrlw $2, %ymm1, %ymm1
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2
; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: PR162812:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsllw $2, %ymm0, %ymm2
-; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
+; AVX512-NEXT: vpaddb %ymm0, %ymm0, %ymm2
; AVX512-NEXT: vpsrlw $2, %ymm1, %ymm1
; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX512-NEXT: vpaddb %ymm0, %ymm0, %ymm2
; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll b/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll
index 885b075..59b03f8 100644
--- a/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll
+++ b/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll
@@ -9,7 +9,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) {
; AVX256BW: # %bb.0:
; AVX256BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX256BW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
-; AVX256BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
+; AVX256BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; AVX256BW-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX256BW-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX256BW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
diff --git a/llvm/test/CodeGen/X86/rotate-extract-vector.ll b/llvm/test/CodeGen/X86/rotate-extract-vector.ll
index 1ead3f9..7d0ec64 100644
--- a/llvm/test/CodeGen/X86/rotate-extract-vector.ll
+++ b/llvm/test/CodeGen/X86/rotate-extract-vector.ll
@@ -149,19 +149,12 @@ define <32 x i16> @illegal_no_extract_mul(<32 x i16> %i) nounwind {
; Result would undershift
define <4 x i64> @no_extract_shl(<4 x i64> %i) nounwind {
-; X86-LABEL: no_extract_shl:
-; X86: # %bb.0:
-; X86-NEXT: vpsllq $24, %ymm0, %ymm1
-; X86-NEXT: vpsrlq $39, %ymm0, %ymm0
-; X86-NEXT: vpternlogq $236, {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm1, %ymm0
-; X86-NEXT: retl
-;
-; X64-LABEL: no_extract_shl:
-; X64: # %bb.0:
-; X64-NEXT: vpsllq $24, %ymm0, %ymm1
-; X64-NEXT: vpsrlq $39, %ymm0, %ymm0
-; X64-NEXT: vpternlogq $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
-; X64-NEXT: retq
+; CHECK-LABEL: no_extract_shl:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllq $24, %ymm0, %ymm1
+; CHECK-NEXT: vpsrlq $39, %ymm0, %ymm0
+; CHECK-NEXT: vpternlogq {{.*#+}} ymm0 = (ymm0 & m64bcst) | ymm1
+; CHECK-NEXT: ret{{[l|q]}}
%lhs_mul = shl <4 x i64> %i, <i64 11, i64 11, i64 11, i64 11>
%rhs_mul = shl <4 x i64> %i, <i64 24, i64 24, i64 24, i64 24>
%lhs_shift = lshr <4 x i64> %lhs_mul, <i64 50, i64 50, i64 50, i64 50>
@@ -171,19 +164,12 @@ define <4 x i64> @no_extract_shl(<4 x i64> %i) nounwind {
; Result would overshift
define <4 x i32> @no_extract_shrl(<4 x i32> %i) nounwind {
-; X86-LABEL: no_extract_shrl:
-; X86: # %bb.0:
-; X86-NEXT: vpsrld $9, %xmm0, %xmm1
-; X86-NEXT: vpslld $25, %xmm0, %xmm0
-; X86-NEXT: vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm1, %xmm0
-; X86-NEXT: retl
-;
-; X64-LABEL: no_extract_shrl:
-; X64: # %bb.0:
-; X64-NEXT: vpsrld $9, %xmm0, %xmm1
-; X64-NEXT: vpslld $25, %xmm0, %xmm0
-; X64-NEXT: vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
-; X64-NEXT: retq
+; CHECK-LABEL: no_extract_shrl:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsrld $9, %xmm0, %xmm1
+; CHECK-NEXT: vpslld $25, %xmm0, %xmm0
+; CHECK-NEXT: vpternlogd {{.*#+}} xmm0 = (xmm0 & m32bcst) | xmm1
+; CHECK-NEXT: ret{{[l|q]}}
%lhs_div = lshr <4 x i32> %i, <i32 3, i32 3, i32 3, i32 3>
%rhs_div = lshr <4 x i32> %i, <i32 9, i32 9, i32 9, i32 9>
%lhs_shift = shl <4 x i32> %lhs_div, <i32 28, i32 28, i32 28, i32 28>
diff --git a/llvm/test/CodeGen/X86/sdiv-exact.ll b/llvm/test/CodeGen/X86/sdiv-exact.ll
index 4568191..7873ffa 100644
--- a/llvm/test/CodeGen/X86/sdiv-exact.ll
+++ b/llvm/test/CodeGen/X86/sdiv-exact.ll
@@ -87,7 +87,7 @@ define <4 x i32> @test5(<4 x i32> %x) {
; X86-NEXT: pmuludq %xmm1, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [2863311531,u,3264175145,u]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: retl
@@ -95,7 +95,7 @@ define <4 x i32> @test5(<4 x i32> %x) {
; X64-LABEL: test5:
; X64: # %bb.0:
; X64-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,3264175145,3264175145]
; X64-NEXT: retq
%div = sdiv exact <4 x i32> %x, <i32 24, i32 24, i32 25, i32 25>
ret <4 x i32> %div
@@ -112,7 +112,7 @@ define <4 x i32> @test6(<4 x i32> %x) {
; X86-NEXT: pmuludq %xmm0, %xmm1
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [2863311531,u,3303820997,u]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: movdqa %xmm1, %xmm0
@@ -121,7 +121,7 @@ define <4 x i32> @test6(<4 x i32> %x) {
; X64-LABEL: test6:
; X64: # %bb.0:
; X64-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,3303820997,3303820997]
; X64-NEXT: retq
%div = sdiv exact <4 x i32> %x, <i32 24, i32 24, i32 26, i32 26>
ret <4 x i32> %div
@@ -131,16 +131,16 @@ define <4 x i32> @test7(<4 x i32> %x) {
; X86-LABEL: test7:
; X86: # %bb.0:
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [3264175145,3264175145,1749801491,1749801491]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [3264175145,u,1749801491,u]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: retl
;
; X64-LABEL: test7:
; X64: # %bb.0:
-; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,1749801491,1749801491]
; X64-NEXT: retq
%div = sdiv exact <4 x i32> %x, <i32 25, i32 25, i32 27, i32 27>
ret <4 x i32> %div
@@ -156,7 +156,7 @@ define <4 x i32> @test8(<4 x i32> %x) {
; X86-NEXT: pmuludq %xmm1, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [1,u,2863311531,u]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: retl
@@ -164,7 +164,7 @@ define <4 x i32> @test8(<4 x i32> %x) {
; X64-LABEL: test8:
; X64: # %bb.0:
; X64-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,2863311531,2863311531]
; X64-NEXT: retq
%div = sdiv exact <4 x i32> %x, <i32 1, i32 1, i32 24, i32 24>
ret <4 x i32> %div
diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll
index e53eed4..504a392 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul.ll
@@ -1760,7 +1760,7 @@ define void @mul_2xi16_varconst1(ptr nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl c, %edx
; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X86-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [0,65535,u,u]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1781,7 +1781,7 @@ define void @mul_2xi16_varconst1(ptr nocapture readonly %a, i64 %index) {
; X64-AVX-NEXT: movq c(%rip), %rax
; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,65535,u,u]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
@@ -1864,7 +1864,7 @@ define void @mul_2xi16_varconst3(ptr nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: movl c, %edx
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
-; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [65536,65536,65536,65536]
; X86-SSE-NEXT: psllq $32, %xmm0
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
@@ -1876,7 +1876,7 @@ define void @mul_2xi16_varconst3(ptr nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl c, %edx
; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X86-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [0,65536,u,u]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1885,7 +1885,7 @@ define void @mul_2xi16_varconst3(ptr nocapture readonly %a, i64 %index) {
; X64-SSE-NEXT: movq c(%rip), %rax
; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
-; X64-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65536,65536,65536,65536]
; X64-SSE-NEXT: psllq $32, %xmm0
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
; X64-SSE-NEXT: retq
@@ -1895,7 +1895,7 @@ define void @mul_2xi16_varconst3(ptr nocapture readonly %a, i64 %index) {
; X64-AVX-NEXT: movq c(%rip), %rax
; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,65536,u,u]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
@@ -1922,7 +1922,7 @@ define void @mul_2xi16_varconst4(ptr nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: movl c, %edx
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: psrad $16, %xmm0
-; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [32768,32768,32768,32768]
; X86-SSE-NEXT: psllq $32, %xmm0
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
@@ -1934,7 +1934,7 @@ define void @mul_2xi16_varconst4(ptr nocapture readonly %a, i64 %index) {
; X86-AVX-NEXT: movl c, %edx
; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
-; X86-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [0,32768,u,u]
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
@@ -1943,7 +1943,7 @@ define void @mul_2xi16_varconst4(ptr nocapture readonly %a, i64 %index) {
; X64-SSE-NEXT: movq c(%rip), %rax
; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-SSE-NEXT: psrad $16, %xmm0
-; X64-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,32768,32768,32768]
; X64-SSE-NEXT: psllq $32, %xmm0
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
; X64-SSE-NEXT: retq
@@ -1953,7 +1953,7 @@ define void @mul_2xi16_varconst4(ptr nocapture readonly %a, i64 %index) {
; X64-AVX-NEXT: movq c(%rip), %rax
; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0
-; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,32768,u,u]
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
; X64-AVX-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/slow-pmulld.ll b/llvm/test/CodeGen/X86/slow-pmulld.ll
index 975ffd0..e8c05f9 100644
--- a/llvm/test/CodeGen/X86/slow-pmulld.ll
+++ b/llvm/test/CodeGen/X86/slow-pmulld.ll
@@ -336,13 +336,13 @@ define <4 x i32> @test_mul_v4i32_v4i16(<4 x i16> %A) {
; SSE4-32-LABEL: test_mul_v4i32_v4i16:
; SSE4-32: # %bb.0:
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE4-32-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; SSE4-32-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [18778,18778,18778,18778]
; SSE4-32-NEXT: retl
;
; SSE4-64-LABEL: test_mul_v4i32_v4i16:
; SSE4-64: # %bb.0:
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE4-64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE4-64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [18778,18778,18778,18778]
; SSE4-64-NEXT: retq
;
; AVX2-SLOW-LABEL: test_mul_v4i32_v4i16:
@@ -838,13 +838,13 @@ define <4 x i32> @test_mul_v4i32_v4i16_minsize(<4 x i16> %A) minsize {
; SSE-32-LABEL: test_mul_v4i32_v4i16_minsize:
; SSE-32: # %bb.0:
; SSE-32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE-32-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; SSE-32-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [18778,18778,18778,18778]
; SSE-32-NEXT: retl
;
; SSE-64-LABEL: test_mul_v4i32_v4i16_minsize:
; SSE-64: # %bb.0:
; SSE-64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE-64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [18778,18778,18778,18778]
; SSE-64-NEXT: retq
;
; AVX2-LABEL: test_mul_v4i32_v4i16_minsize:
diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
index 2d07788..bb7245c 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
@@ -10,15 +10,15 @@ define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_odd_even:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,3264175145,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -30,10 +30,10 @@ define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_even:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,3264175145,3264175145]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -47,10 +47,10 @@ define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_even:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,3264175145,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -63,7 +63,7 @@ define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_odd_even:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,3264175145,3264175145]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -75,7 +75,7 @@ define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_odd_even:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,3264175145,3264175145]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -109,7 +109,7 @@ define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_allones_eq:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
@@ -119,7 +119,7 @@ define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_allones_eq:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -168,7 +168,7 @@ define <4 x i32> @test_srem_odd_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_allones_ne:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
@@ -178,7 +178,7 @@ define <4 x i32> @test_srem_odd_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_allones_ne:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -234,7 +234,7 @@ define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_even_allones_eq:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE41-NEXT: psrld $1, %xmm1
@@ -248,7 +248,7 @@ define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_even_allones_eq:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0
@@ -308,7 +308,7 @@ define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_even_allones_ne:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE41-NEXT: psrld $1, %xmm1
@@ -322,7 +322,7 @@ define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_even_allones_ne:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0
@@ -367,15 +367,15 @@ define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_odd_even_allones_eq:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -387,10 +387,10 @@ define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_even_allones_eq:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -404,10 +404,10 @@ define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_even_allones_eq:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -420,7 +420,7 @@ define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_odd_even_allones_eq:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -432,7 +432,7 @@ define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_eq:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -448,15 +448,15 @@ define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_odd_even_allones_ne:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -468,10 +468,10 @@ define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_even_allones_ne:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -485,10 +485,10 @@ define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_even_allones_ne:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -501,7 +501,7 @@ define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_odd_even_allones_ne:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -514,7 +514,7 @@ define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_ne:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -534,14 +534,14 @@ define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,u,1,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-SSE2-NEXT: psrlq $32, %xmm0
@@ -553,7 +553,7 @@ define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456]
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
@@ -568,9 +568,9 @@ define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,u,268435456,u]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -581,7 +581,7 @@ define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -593,7 +593,7 @@ define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -611,9 +611,9 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_even_poweroftwo:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,u,1,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -636,11 +636,11 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_even_poweroftwo:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,2147483648,2147483648]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,268435456,2147483648]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -654,11 +654,11 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_even_poweroftwo:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,2147483648,2147483648]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,2147483648,268435456,2147483648]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -671,7 +671,7 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_even_poweroftwo:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -683,7 +683,7 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -701,9 +701,9 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -711,7 +711,7 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [2147483648,u,1073741824,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
@@ -725,11 +725,11 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,u,1073741824,u]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2147483648,268435456,1073741824]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -743,11 +743,11 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,u,1073741824,u]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2147483648,268435456,1073741824]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -760,7 +760,7 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -772,7 +772,7 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -806,7 +806,7 @@ define <4 x i32> @test_srem_odd_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
@@ -816,7 +816,7 @@ define <4 x i32> @test_srem_odd_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -871,7 +871,7 @@ define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_even_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE41-NEXT: psrld $1, %xmm1
@@ -885,7 +885,7 @@ define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_even_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0
@@ -929,15 +929,15 @@ define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_odd_even_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -949,10 +949,10 @@ define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_even_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -966,10 +966,10 @@ define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_even_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -982,7 +982,7 @@ define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_odd_even_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -994,7 +994,7 @@ define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_odd_even_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -1018,9 +1018,9 @@ define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-SSE2-NEXT: pand %xmm0, %xmm2
; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm2
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,u,1,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
@@ -1039,7 +1039,7 @@ define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
; CHECK-SSE41-NEXT: pand %xmm0, %xmm2
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm2
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,1,858993458]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
@@ -1053,7 +1053,7 @@ define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; CHECK-AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
@@ -1067,7 +1067,7 @@ define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
; CHECK-AVX2-NEXT: vpand %xmm2, %xmm0, %xmm2
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; CHECK-AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
@@ -1080,7 +1080,7 @@ define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
@@ -1102,7 +1102,7 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1137,8 +1137,8 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [2147483648,2147483648,2147483648,2147483648]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [2147483648,2147483648,2,2147483648]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
@@ -1156,11 +1156,11 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-AVX1-LABEL: test_srem_even_INT_MIN:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 # [3067833783,3067833783,1,3067833783]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [2147483648,2147483648,2147483648,2147483648]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [2147483648,2147483648,2,2147483648]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
@@ -1177,7 +1177,7 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-AVX2-LABEL: test_srem_even_INT_MIN:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 # [3067833783,3067833783,1,3067833783]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
@@ -1196,7 +1196,7 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
@@ -1219,7 +1219,7 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [3067833783,u,3264175145,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1227,7 +1227,7 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,u,1073741824,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
@@ -1253,8 +1253,8 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [2147483648,u,1073741824,u]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1,2147483648,2,1073741824]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
@@ -1272,11 +1272,11 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-AVX1-LABEL: test_srem_odd_even_INT_MIN:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 # [3435973837,3067833783,1,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [2147483648,u,1073741824,u]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1,2147483648,2,1073741824]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2]
@@ -1293,7 +1293,7 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-AVX2-LABEL: test_srem_odd_even_INT_MIN:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 # [3435973837,3067833783,1,3264175145]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
@@ -1312,7 +1312,7 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
@@ -1333,14 +1333,14 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,0,1,3435973837]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,u,3435973837,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-SSE2-NEXT: psrlq $32, %xmm0
@@ -1352,7 +1352,7 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,0,1,3435973837]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456]
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
@@ -1367,9 +1367,9 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,3435973837]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,u,268435456,u]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1380,7 +1380,7 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,3435973837]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -1392,7 +1392,7 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,3435973837]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -1410,9 +1410,9 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,0,1,3067833783]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,u,3067833783,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -1420,7 +1420,7 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [1,u,2147483648,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
@@ -1434,11 +1434,11 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,0,1,3067833783]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,2147483648,u]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,1,268435456,2147483648]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1452,11 +1452,11 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,0,1,3067833783]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,u,2147483648,u]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,1,268435456,2147483648]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1469,7 +1469,7 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,0,1,3067833783]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -1481,7 +1481,7 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,0,1,3067833783]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -1499,9 +1499,9 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_poweroftwo:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,0,1,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,u,3264175145,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -1509,7 +1509,7 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [1,u,1073741824,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
@@ -1523,11 +1523,11 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
;
; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_poweroftwo:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,0,1,3264175145]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,1073741824,u]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,268435456,1073741824]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1541,11 +1541,11 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
;
; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_poweroftwo:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,u,1073741824,u]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,268435456,1073741824]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1558,7 +1558,7 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
;
; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_poweroftwo:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,3264175145]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -1570,7 +1570,7 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
;
; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_poweroftwo:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,3264175145]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -1604,7 +1604,7 @@ define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_allones_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,4294967295,858993458]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
@@ -1614,7 +1614,7 @@ define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_allones_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1669,7 +1669,7 @@ define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_even_allones_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE41-NEXT: psrld $1, %xmm1
@@ -1683,7 +1683,7 @@ define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_even_allones_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0
@@ -1727,15 +1727,15 @@ define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,0,0,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,u,3264175145,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,1073741824,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -1747,10 +1747,10 @@ define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,0,0,3264175145]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,1,1073741824,1073741824]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1764,10 +1764,10 @@ define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,0,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,1,1073741824,1073741824]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1780,7 +1780,7 @@ define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,0,3264175145]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -1792,7 +1792,7 @@ define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,0,3264175145]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -1812,15 +1812,15 @@ define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo_and_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3435973837]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,3435973837,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,268435456,1,1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -1832,10 +1832,10 @@ define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3435973837]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,268435456,1,1]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1849,10 +1849,10 @@ define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3435973837]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [268435456,268435456,1,1]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1865,7 +1865,7 @@ define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3435973837]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -1877,7 +1877,7 @@ define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3435973837]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -1895,9 +1895,9 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_even_poweroftwo_and_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,1,0,3067833783]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,3067833783,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -1905,7 +1905,7 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [268435456,u,2147483648,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
@@ -1919,11 +1919,11 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_even_poweroftwo_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,1,0,3067833783]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,u,2147483648,u]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,268435456,1,2147483648]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1937,11 +1937,11 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_even_poweroftwo_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,1,0,3067833783]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [268435456,u,2147483648,u]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,268435456,1,2147483648]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1954,7 +1954,7 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_even_poweroftwo_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,1,0,3067833783]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -1966,7 +1966,7 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,1,0,3067833783]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -1984,15 +1984,15 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo_and_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,3264175145,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,268435456,1073741824,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -2004,10 +2004,10 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3264175145]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,268435456,1073741824,1073741824]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -2021,10 +2021,10 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [268435456,268435456,1073741824,1073741824]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -2037,7 +2037,7 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3264175145]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -2049,7 +2049,7 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3264175145]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -2067,9 +2067,9 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,u,1,u]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE2-NEXT: psrlq $32, %xmm1
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -2081,9 +2081,9 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
;
; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,u,1,u]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u]
; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: psrlq $32, %xmm0
@@ -2096,9 +2096,9 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
;
; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,u,1,u]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,u,268435456,u]
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
@@ -2110,7 +2110,7 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
;
; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,0]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -2122,7 +2122,7 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
;
; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,0,1,0]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -2138,9 +2138,9 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,u,1,u]
; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,u,268435456,u]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE2-NEXT: psrlq $32, %xmm1
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -2152,9 +2152,9 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
;
; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,u,1,u]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,u,268435456,u]
; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: psrlq $32, %xmm0
@@ -2167,9 +2167,9 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
;
; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,u,1,u]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,u,268435456,u]
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
@@ -2181,7 +2181,7 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
;
; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,0,1,0]
; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -2193,7 +2193,7 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
;
; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,0,1,0]
; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
@@ -2335,10 +2335,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [34048,34048,26368,37632,21760,33024,22016,35072]
+; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,133,0,133,0,103,0,147,0,85,0,129,0,86,0,137]
; CHECK-AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; CHECK-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [20224,26368,6912,30976,33024,33024,33024,12032]
+; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,79,0,103,0,27,0,121,0,129,0,129,0,129,0,47]
; CHECK-AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; CHECK-AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm4
; CHECK-AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm5 # [0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0]
@@ -2369,10 +2369,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX1-NEXT: vpsubb %xmm4, %xmm0, %xmm4
; CHECK-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [2304,0,10496,37632,33024,33024,21760,36096]
+; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [0,9,0,0,0,41,0,147,0,129,0,129,0,85,0,141]
; CHECK-AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6
; CHECK-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm7 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 # [22016,24320,37632,11008,12544,32512,16640,37632]
+; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 # [0,86,0,95,0,147,0,43,0,49,0,127,0,65,0,147]
; CHECK-AVX1-NEXT: vpsrlw $8, %xmm7, %xmm7
; CHECK-AVX1-NEXT: vpackuswb %xmm6, %xmm7, %xmm6
; CHECK-AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm7 # [0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0]
@@ -2417,10 +2417,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
-; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [34048,34048,26368,37632,21760,33024,22016,35072,2304,0,10496,37632,33024,33024,21760,36096]
+; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,133,0,133,0,103,0,147,0,85,0,129,0,86,0,137,0,9,0,0,0,41,0,147,0,129,0,129,0,85,0,141]
; CHECK-AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3
; CHECK-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
-; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [20224,26368,6912,30976,33024,33024,33024,12032,22016,24320,37632,11008,12544,32512,16640,37632]
+; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,79,0,103,0,27,0,121,0,129,0,129,0,129,0,47,0,86,0,95,0,147,0,43,0,49,0,127,0,65,0,147]
; CHECK-AVX2-NEXT: vpsrlw $8, %ymm4, %ymm4
; CHECK-AVX2-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
; CHECK-AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm4 # [0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0]
diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
index 3359202..d459d01 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
@@ -24,7 +24,7 @@ define <4 x i32> @test_srem_odd_25(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_25:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798690,171798690,171798690,171798690]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
@@ -34,7 +34,7 @@ define <4 x i32> @test_srem_odd_25(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_25:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -90,7 +90,7 @@ define <4 x i32> @test_srem_even_100(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_even_100:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE41-NEXT: psrld $2, %xmm1
@@ -104,7 +104,7 @@ define <4 x i32> @test_srem_even_100(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_even_100:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0
@@ -165,7 +165,7 @@ define <4 x i32> @test_srem_odd_neg25(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_odd_neg25:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798690,171798690,171798690,171798690]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
@@ -175,7 +175,7 @@ define <4 x i32> @test_srem_odd_neg25(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_odd_neg25:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -231,7 +231,7 @@ define <4 x i32> @test_srem_even_neg100(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_srem_even_neg100:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE41-NEXT: psrld $2, %xmm1
@@ -245,7 +245,7 @@ define <4 x i32> @test_srem_even_neg100(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_srem_even_neg100:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0
@@ -333,7 +333,7 @@ define <4 x i32> @test_srem_odd_undef1(<4 x i32> %X) nounwind {
; CHECK-SSE41-NEXT: psrld $31, %xmm1
; CHECK-SSE41-NEXT: psrad $3, %xmm2
; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [25,25,25,25]
; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -351,7 +351,7 @@ define <4 x i32> @test_srem_odd_undef1(<4 x i32> %X) nounwind {
; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25,25,25,25]
; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -444,7 +444,7 @@ define <4 x i32> @test_srem_even_undef1(<4 x i32> %X) nounwind {
; CHECK-SSE41-NEXT: psrld $31, %xmm1
; CHECK-SSE41-NEXT: psrad $5, %xmm2
; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [100,100,100,100]
; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -462,7 +462,7 @@ define <4 x i32> @test_srem_even_undef1(<4 x i32> %X) nounwind {
; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [100,100,100,100]
; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/udiv-exact.ll b/llvm/test/CodeGen/X86/udiv-exact.ll
index 271d11e..2b3f26a 100644
--- a/llvm/test/CodeGen/X86/udiv-exact.ll
+++ b/llvm/test/CodeGen/X86/udiv-exact.ll
@@ -87,7 +87,7 @@ define <4 x i32> @test5(<4 x i32> %x) {
; X86-NEXT: pmuludq %xmm1, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [2863311531,u,3264175145,u]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: retl
@@ -95,7 +95,7 @@ define <4 x i32> @test5(<4 x i32> %x) {
; X64-LABEL: test5:
; X64: # %bb.0:
; X64-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,3264175145,3264175145]
; X64-NEXT: retq
%div = udiv exact <4 x i32> %x, <i32 24, i32 24, i32 25, i32 25>
ret <4 x i32> %div
@@ -112,7 +112,7 @@ define <4 x i32> @test6(<4 x i32> %x) {
; X86-NEXT: pmuludq %xmm0, %xmm1
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [2863311531,u,3303820997,u]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: movdqa %xmm1, %xmm0
@@ -121,7 +121,7 @@ define <4 x i32> @test6(<4 x i32> %x) {
; X64-LABEL: test6:
; X64: # %bb.0:
; X64-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,3303820997,3303820997]
; X64-NEXT: retq
%div = udiv exact <4 x i32> %x, <i32 24, i32 24, i32 26, i32 26>
ret <4 x i32> %div
@@ -131,16 +131,16 @@ define <4 x i32> @test7(<4 x i32> %x) {
; X86-LABEL: test7:
; X86: # %bb.0:
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [3264175145,3264175145,1749801491,1749801491]
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [3264175145,u,1749801491,u]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: retl
;
; X64-LABEL: test7:
; X64: # %bb.0:
-; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,1749801491,1749801491]
; X64-NEXT: retq
%div = udiv exact <4 x i32> %x, <i32 25, i32 25, i32 27, i32 27>
ret <4 x i32> %div
@@ -156,7 +156,7 @@ define <4 x i32> @test8(<4 x i32> %x) {
; X86-NEXT: pmuludq %xmm1, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [1,u,2863311531,u]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: retl
@@ -164,7 +164,7 @@ define <4 x i32> @test8(<4 x i32> %x) {
; X64-LABEL: test8:
; X64: # %bb.0:
; X64-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,2863311531,2863311531]
; X64-NEXT: retq
%div = udiv exact <4 x i32> %x, <i32 1, i32 1, i32 24, i32 24>
ret <4 x i32> %div
diff --git a/llvm/test/CodeGen/X86/undo-mul-and.ll b/llvm/test/CodeGen/X86/undo-mul-and.ll
index c9c40099..6566153 100644
--- a/llvm/test/CodeGen/X86/undo-mul-and.ll
+++ b/llvm/test/CodeGen/X86/undo-mul-and.ll
@@ -63,9 +63,9 @@ define <4 x i32> @mul_and_to_neg_shl_and_vec_fail_no_splat(<4 x i32> %x) {
; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat:
; CHECK-SSE: # %bb.0:
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [56,56,56,64]
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [56,u,64,u]
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -73,13 +73,13 @@ define <4 x i32> @mul_and_to_neg_shl_and_vec_fail_no_splat(<4 x i32> %x) {
;
; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [56,56,56,64]
; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat:
; CHECK-AVX512: # %bb.0:
-; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [56,56,56,64]
; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 64>
@@ -92,9 +92,9 @@ define <4 x i32> @mul_and_to_neg_shl_and_vec_todo_no_splat1(<4 x i32> %x) {
; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1:
; CHECK-SSE: # %bb.0:
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [56,56,56,48]
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [56,u,48,u]
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -102,13 +102,13 @@ define <4 x i32> @mul_and_to_neg_shl_and_vec_todo_no_splat1(<4 x i32> %x) {
;
; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [56,56,56,48]
; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1:
; CHECK-AVX512: # %bb.0:
-; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [56,56,56,48]
; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 48>
@@ -131,7 +131,7 @@ define <4 x i32> @mul_and_to_neg_shl_and_vec_todo_no_splat2(<4 x i32> %x) {
;
; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat2:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [56,56,56,56]
; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
index 874d885..759055d 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
@@ -167,7 +167,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; SSE41-NEXT: pinsrd $1, %esi, %xmm0
; SSE41-NEXT: pinsrd $2, %edx, %xmm0
; SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [683,1463,819,u]
; SSE41-NEXT: pmovsxwd {{.*#+}} xmm1 = [2047,2047,2047,2047]
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: pand %xmm1, %xmm2
@@ -193,7 +193,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; AVX1-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
; AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [683,1463,819,u]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [2047,2047,2047,2047]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2
@@ -218,7 +218,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; AVX2-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; AVX2-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
; AVX2-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [683,1463,819,u]
; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2047,2047,2047,2047]
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
@@ -240,7 +240,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; AVX512VL-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; AVX512VL-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
; AVX512VL-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [683,1463,819,u]
; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2047,2047,2047,2047]
; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
index 838086e..2228c09 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
@@ -10,10 +10,10 @@ define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_even:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,3264175145,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -26,9 +26,9 @@ define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_even:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,3264175145,3264175145]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -42,9 +42,9 @@ define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_even:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,3264175145,3264175145]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -57,7 +57,7 @@ define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_odd_even:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,3264175145,3264175145]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -68,7 +68,7 @@ define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_odd_even:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,3264175145,3264175145]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -87,9 +87,9 @@ define <4 x i32> @test_urem_odd_allones_eq(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_allones_eq:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,4294967295,3435973837]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -99,7 +99,7 @@ define <4 x i32> @test_urem_odd_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_allones_eq:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,4294967295,3435973837]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,1,858993459]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -108,7 +108,7 @@ define <4 x i32> @test_urem_odd_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX-LABEL: test_urem_odd_allones_eq:
; CHECK-AVX: # %bb.0:
-; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,4294967295,3435973837]
; CHECK-AVX-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -122,9 +122,9 @@ define <4 x i32> @test_urem_odd_allones_ne(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_allones_ne:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,4294967295,3435973837]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -134,7 +134,7 @@ define <4 x i32> @test_urem_odd_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_allones_ne:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,4294967295,3435973837]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993460,858993460,2,858993460]
; CHECK-SSE41-NEXT: pmaxud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -143,7 +143,7 @@ define <4 x i32> @test_urem_odd_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX-LABEL: test_urem_odd_allones_ne:
; CHECK-AVX: # %bb.0:
-; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,4294967295,3435973837]
; CHECK-AVX-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -159,12 +159,12 @@ define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_even_allones_eq:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,4294967295,3067833783]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1,2147483648]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
@@ -178,10 +178,10 @@ define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_even_allones_eq:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,4294967295,3067833783]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,2147483648,2147483648]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1,2147483648]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -195,10 +195,10 @@ define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_even_allones_eq:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,4294967295,3067833783]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,2147483648,2147483648]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,2147483648,1,2147483648]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -211,7 +211,7 @@ define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_even_allones_eq:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,4294967295,3067833783]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -222,7 +222,7 @@ define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_even_allones_eq:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,4294967295,3067833783]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -237,12 +237,12 @@ define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_even_allones_ne:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,4294967295,3067833783]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1,2147483648]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
@@ -256,10 +256,10 @@ define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_even_allones_ne:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,4294967295,3067833783]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,2147483648,2147483648]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,1,2147483648]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -273,10 +273,10 @@ define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_even_allones_ne:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,4294967295,3067833783]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,2147483648,2147483648]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,2147483648,1,2147483648]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -289,7 +289,7 @@ define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_even_allones_ne:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,4294967295,3067833783]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -300,7 +300,7 @@ define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_even_allones_ne:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,4294967295,3067833783]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -317,10 +317,10 @@ define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_even_allones_eq:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,4294967295,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -333,9 +333,9 @@ define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_even_allones_eq:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,4294967295,3264175145]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -349,9 +349,9 @@ define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_even_allones_eq:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,4294967295,3264175145]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -364,7 +364,7 @@ define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_odd_even_allones_eq:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,4294967295,3264175145]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -375,7 +375,7 @@ define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_eq:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,4294967295,3264175145]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -390,10 +390,10 @@ define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_even_allones_ne:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,4294967295,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -406,9 +406,9 @@ define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_even_allones_ne:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,4294967295,3264175145]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -422,9 +422,9 @@ define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_even_allones_ne:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,4294967295,3264175145]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -437,7 +437,7 @@ define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_odd_even_allones_ne:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,4294967295,3264175145]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -448,7 +448,7 @@ define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_ne:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,4294967295,3264175145]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -467,10 +467,10 @@ define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_poweroftwo:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,1,3435973837]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-SSE2-NEXT: psrlq $32, %xmm0
@@ -482,7 +482,7 @@ define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_poweroftwo:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456]
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
@@ -496,8 +496,8 @@ define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_poweroftwo:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,u,268435456,u]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -508,7 +508,7 @@ define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_odd_poweroftwo:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -519,7 +519,7 @@ define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -536,12 +536,12 @@ define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_even_poweroftwo:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,1,3067833783]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,268435456,2147483648]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
@@ -555,10 +555,10 @@ define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_even_poweroftwo:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,2147483648,2147483648]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,268435456,2147483648]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -572,10 +572,10 @@ define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_even_poweroftwo:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,2147483648,2147483648]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,2147483648,268435456,2147483648]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -588,7 +588,7 @@ define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_even_poweroftwo:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -599,7 +599,7 @@ define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -616,11 +616,11 @@ define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_even_poweroftwo:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,1,3264175145]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2147483648,268435456,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,u,1073741824,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -634,10 +634,10 @@ define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_even_poweroftwo:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,u,1073741824,u]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2147483648,268435456,1073741824]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -651,10 +651,10 @@ define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_even_poweroftwo:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,u,1073741824,u]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2147483648,268435456,1073741824]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -667,7 +667,7 @@ define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_odd_even_poweroftwo:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -678,7 +678,7 @@ define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -710,7 +710,7 @@ define <4 x i32> @test_urem_odd_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,4294967295,858993459]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -719,7 +719,7 @@ define <4 x i32> @test_urem_odd_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -769,7 +769,7 @@ define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_even_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE41-NEXT: psrld $1, %xmm1
; CHECK-SSE41-NEXT: pslld $31, %xmm0
@@ -782,7 +782,7 @@ define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_even_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -822,10 +822,10 @@ define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_even_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -838,9 +838,9 @@ define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_even_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -854,9 +854,9 @@ define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_even_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,1073741824,1073741824]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -869,7 +869,7 @@ define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_odd_even_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -880,7 +880,7 @@ define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_odd_even_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,0,3264175145]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -899,10 +899,10 @@ define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_INT_MIN:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,1,3435973837]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,2,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-SSE2-NEXT: psrlq $32, %xmm0
@@ -914,7 +914,7 @@ define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_INT_MIN:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [1,2]
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
@@ -928,8 +928,8 @@ define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_INT_MIN:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,u,2,u]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -940,7 +940,7 @@ define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_odd_INT_MIN:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -951,7 +951,7 @@ define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_odd_INT_MIN:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,1,3435973837]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -968,12 +968,12 @@ define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_even_INT_MIN:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,3067833783,3067833783,3067833783]
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,1,3067833783]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,2,2147483648]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
@@ -987,10 +987,10 @@ define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_even_INT_MIN:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,2147483648,2147483648,2147483648]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,2147483648,2,2147483648]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1004,10 +1004,10 @@ define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_even_INT_MIN:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,2147483648,2147483648,2147483648]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,2147483648,2,2147483648]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1020,7 +1020,7 @@ define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_even_INT_MIN:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1031,7 +1031,7 @@ define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_even_INT_MIN:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,3067833783,1,3067833783]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1048,11 +1048,11 @@ define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_even_INT_MIN:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,1,3264175145]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2147483648,2,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3067833783,u,3264175145,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,u,1073741824,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1066,10 +1066,10 @@ define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_even_INT_MIN:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2147483648,u,1073741824,u]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2147483648,2,1073741824]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1083,10 +1083,10 @@ define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_even_INT_MIN:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2147483648,u,1073741824,u]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2147483648,2,1073741824]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1099,7 +1099,7 @@ define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_odd_even_INT_MIN:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1110,7 +1110,7 @@ define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_odd_even_INT_MIN:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3067833783,1,3264175145]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1129,10 +1129,10 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_allones_and_poweroftwo:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,u,3435973837,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,1,3435973837]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-SSE2-NEXT: psrlq $32, %xmm0
@@ -1144,7 +1144,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_allones_and_poweroftwo:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,1,3435973837]
; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456]
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
@@ -1158,8 +1158,8 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_allones_and_poweroftwo:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,3435973837]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,u,268435456,u]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1170,7 +1170,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_odd_allones_and_poweroftwo:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,3435973837]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1181,7 +1181,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,3435973837]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1198,11 +1198,11 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_even_allones_and_poweroftwo:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,4294967295,1,3067833783]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,1,268435456,2147483648]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,u,3067833783,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,2147483648,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1216,10 +1216,10 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_even_allones_and_poweroftwo:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,4294967295,1,3067833783]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,2147483648,u]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,1,268435456,2147483648]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1233,10 +1233,10 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_even_allones_and_poweroftwo:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,1,3067833783]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,u,2147483648,u]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,1,268435456,2147483648]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1249,7 +1249,7 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_even_allones_and_poweroftwo:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,1,3067833783]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1260,7 +1260,7 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,1,3067833783]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1277,11 +1277,11 @@ define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
; CHECK-SSE2-LABEL: test_urem_odd_even_allones_and_poweroftwo:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,1,3264175145]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,268435456,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,u,3264175145,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,1073741824,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1295,10 +1295,10 @@ define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
;
; CHECK-SSE41-LABEL: test_urem_odd_even_allones_and_poweroftwo:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,1,3264175145]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,1073741824,u]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,268435456,1073741824]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1312,10 +1312,10 @@ define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
;
; CHECK-AVX1-LABEL: test_urem_odd_even_allones_and_poweroftwo:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,3264175145]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,u,1073741824,u]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,268435456,1073741824]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1328,7 +1328,7 @@ define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
;
; CHECK-AVX2-LABEL: test_urem_odd_even_allones_and_poweroftwo:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,3264175145]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1339,7 +1339,7 @@ define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
;
; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_poweroftwo:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,3264175145]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1358,9 +1358,9 @@ define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_allones_and_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,0,3435973837]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,u,3435973837,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -1370,7 +1370,7 @@ define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_allones_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,0,3435973837]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,1,4294967295,858993459]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -1379,7 +1379,7 @@ define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX-LABEL: test_urem_odd_allones_and_one:
; CHECK-AVX: # %bb.0:
-; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,0,3435973837]
; CHECK-AVX-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -1395,11 +1395,11 @@ define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_even_allones_and_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,4294967295,0,3067833783]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,1,1,2147483648]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,u,3067833783,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,2147483648,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1413,10 +1413,10 @@ define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_even_allones_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,4294967295,0,3067833783]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,2147483648,u]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,1,1,2147483648]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1430,10 +1430,10 @@ define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_even_allones_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,0,3067833783]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,u,2147483648,u]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,1,1,2147483648]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1446,7 +1446,7 @@ define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_even_allones_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,0,3067833783]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1457,7 +1457,7 @@ define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,0,3067833783]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1474,10 +1474,10 @@ define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_even_allones_and_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,0,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,u,3264175145,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,1,1073741824,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1490,9 +1490,9 @@ define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_even_allones_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,0,3264175145]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,1,1073741824,1073741824]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1506,9 +1506,9 @@ define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_even_allones_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,0,3264175145]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,1,1073741824,1073741824]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1521,7 +1521,7 @@ define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_odd_even_allones_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,0,3264175145]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1532,7 +1532,7 @@ define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,0,3264175145]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1551,10 +1551,10 @@ define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_poweroftwo_and_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3435973837]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,3435973837,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,268435456,1,1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1567,9 +1567,9 @@ define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_poweroftwo_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3435973837]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,268435456,1,1]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1583,9 +1583,9 @@ define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_poweroftwo_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3435973837]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [268435456,268435456,1,1]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1598,7 +1598,7 @@ define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_odd_poweroftwo_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3435973837]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1609,7 +1609,7 @@ define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3435973837]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1626,11 +1626,11 @@ define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_even_poweroftwo_and_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,1,0,3067833783]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,268435456,1,2147483648]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,3067833783,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,u,2147483648,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1644,10 +1644,10 @@ define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_even_poweroftwo_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,1,0,3067833783]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,u,2147483648,u]
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,268435456,1,2147483648]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1661,10 +1661,10 @@ define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_even_poweroftwo_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,1,0,3067833783]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [268435456,u,2147483648,u]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2147483648,268435456,1,2147483648]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1677,7 +1677,7 @@ define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_even_poweroftwo_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,1,0,3067833783]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1688,7 +1688,7 @@ define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,1,0,3067833783]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1705,10 +1705,10 @@ define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_even_poweroftwo_and_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,u,3264175145,u]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,268435456,1073741824,1073741824]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -1721,9 +1721,9 @@ define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_even_poweroftwo_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,1,0,3264175145]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [268435456,268435456,1073741824,1073741824]
; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1737,9 +1737,9 @@ define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_even_poweroftwo_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3264175145]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [268435456,268435456,1073741824,1073741824]
; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1752,7 +1752,7 @@ define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_odd_even_poweroftwo_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3264175145]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1763,7 +1763,7 @@ define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,1,0,3264175145]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1781,10 +1781,10 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
; CHECK-SSE2-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,0,0,0]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,1,0]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,268435456,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-SSE2-NEXT: psrlq $32, %xmm0
@@ -1796,7 +1796,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
;
; CHECK-SSE41-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,4294967295,1,0]
; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456]
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
@@ -1810,8 +1810,8 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
;
; CHECK-AVX1-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,0]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,u,268435456,u]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1822,7 +1822,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
;
; CHECK-AVX2-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,0]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1833,7 +1833,7 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
;
; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,4294967295,1,0]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -1849,10 +1849,10 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
; CHECK-SSE2-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4294967295,0,0,0]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,4294967295,1,0]
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2147483648,u,268435456,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-SSE2-NEXT: psrlq $32, %xmm0
@@ -1864,7 +1864,7 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
;
; CHECK-SSE41-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3067833783,4294967295,1,0]
; CHECK-SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = [2147483648,268435456]
; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
@@ -1878,8 +1878,8 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
;
; CHECK-AVX1-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,1,0]
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [2147483648,u,268435456,u]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1890,7 +1890,7 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
;
; CHECK-AVX2-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,1,0]
; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1901,7 +1901,7 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
;
; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3067833783,4294967295,1,0]
; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
index 6a36cd2..8042103 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
@@ -25,7 +25,7 @@ define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
; CHECK-SSE41-LABEL: t32_3:
; CHECK-SSE41: # %bb.0:
; CHECK-SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655765,1431655764,1431655764,1431655764]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -34,7 +34,7 @@ define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
; CHECK-AVX1-LABEL: t32_3:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: retq
@@ -80,7 +80,7 @@ define <4 x i1> @t32_5(<4 x i32> %X) nounwind {
; CHECK-SSE41-LABEL: t32_5:
; CHECK-SSE41: # %bb.0:
; CHECK-SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,858993458,858993458]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -89,7 +89,7 @@ define <4 x i1> @t32_5(<4 x i32> %X) nounwind {
; CHECK-AVX1-LABEL: t32_5:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3435973837,3435973837,3435973837,3435973837]
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: retq
@@ -140,7 +140,7 @@ define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind {
; CHECK-SSE41-LABEL: t32_6_part0:
; CHECK-SSE41: # %bb.0:
; CHECK-SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE41-NEXT: psrld $1, %xmm1
; CHECK-SSE41-NEXT: pslld $31, %xmm0
@@ -153,7 +153,7 @@ define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind {
; CHECK-AVX1-LABEL: t32_6_part0:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -211,7 +211,7 @@ define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind {
; CHECK-SSE41-LABEL: t32_6_part1:
; CHECK-SSE41: # %bb.0:
; CHECK-SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE41-NEXT: psrld $1, %xmm1
; CHECK-SSE41-NEXT: pslld $31, %xmm0
@@ -224,7 +224,7 @@ define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind {
; CHECK-AVX1-LABEL: t32_6_part1:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -275,7 +275,7 @@ define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind {
; CHECK-SSE41-LABEL: t32_tautological:
; CHECK-SSE41: # %bb.0:
; CHECK-SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295,4294967295,1431655764]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -286,7 +286,7 @@ define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind {
; CHECK-AVX1-LABEL: t32_tautological:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll
index 2166e43..b490c3c 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll
@@ -23,7 +23,7 @@ define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_25:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798691,171798691,171798691,171798691]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -32,7 +32,7 @@ define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_odd_25:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -83,7 +83,7 @@ define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_even_100:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE41-NEXT: psrld $2, %xmm1
; CHECK-SSE41-NEXT: pslld $30, %xmm0
@@ -96,7 +96,7 @@ define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_even_100:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -139,9 +139,9 @@ define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_neg25:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,1030792151,1030792151,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1030792151,u,3264175145,u]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -151,7 +151,7 @@ define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_odd_neg25:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3264175145,1030792151,1030792151,3264175145]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798691,1,1,171798691]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -160,7 +160,7 @@ define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind {
;
; CHECK-AVX-LABEL: test_urem_odd_neg25:
; CHECK-AVX: # %bb.0:
-; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [3264175145,1030792151,1030792151,3264175145]
; CHECK-AVX-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -176,9 +176,9 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_even_neg100:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4252017623,3264175145,4252017623,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3264175145,3264175145,3264175145,3264175145]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -192,7 +192,7 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: test_urem_even_neg100:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4252017623,3264175145,4252017623,3264175145]
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE41-NEXT: psrld $2, %xmm1
; CHECK-SSE41-NEXT: pslld $30, %xmm0
@@ -205,7 +205,7 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: test_urem_even_neg100:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4252017623,3264175145,4252017623,3264175145]
; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -216,7 +216,7 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
;
; CHECK-AVX2-LABEL: test_urem_even_neg100:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4252017623,3264175145,4252017623,3264175145]
; CHECK-AVX2-NEXT: vpsrld $2, %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpslld $30, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -227,7 +227,7 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
;
; CHECK-AVX512VL-LABEL: test_urem_even_neg100:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4252017623,3264175145,4252017623,3264175145]
; CHECK-AVX512VL-NEXT: vprord $2, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -277,7 +277,7 @@ define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind {
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: psrld $3, %xmm2
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [25,25,25,25]
; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -293,7 +293,7 @@ define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind {
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpsrld $3, %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25,25,25,25]
; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -372,7 +372,7 @@ define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind {
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: psrld $5, %xmm2
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [100,100,100,100]
; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -388,7 +388,7 @@ define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind {
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [100,100,100,100]
; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
index 84856aa..e5b19a5 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
@@ -25,7 +25,7 @@ define <4 x i1> @t0_all_tautological(<4 x i32> %X) nounwind {
define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: t1_all_odd_eq:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -33,7 +33,7 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: t1_all_odd_eq:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655765,4294967295,4294967295,4294967295]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -43,7 +43,7 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: t1_all_odd_eq:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
@@ -76,7 +76,7 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind {
define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: t1_all_odd_ne:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -84,7 +84,7 @@ define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind {
;
; CHECK-SSE41-LABEL: t1_all_odd_ne:
; CHECK-SSE41: # %bb.0:
-; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655765,4294967295,4294967295,4294967295]
; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
@@ -95,7 +95,7 @@ define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind {
;
; CHECK-AVX1-LABEL: t1_all_odd_ne:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311531,2863311531,2863311531,2863311531]
; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -187,7 +187,7 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
; CHECK-SSE2-NEXT: psrlq $32, %xmm3
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
-; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311530,2863311530]
; CHECK-SSE2-NEXT: paddq %xmm3, %xmm0
; CHECK-SSE2-NEXT: psllq $32, %xmm0
; CHECK-SSE2-NEXT: paddq %xmm2, %xmm0
@@ -212,7 +212,7 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm3
; CHECK-SSE41-NEXT: psrlq $32, %xmm3
; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3
-; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2863311530,2863311530]
; CHECK-SSE41-NEXT: paddq %xmm3, %xmm0
; CHECK-SSE41-NEXT: psllq $32, %xmm0
; CHECK-SSE41-NEXT: paddq %xmm2, %xmm0
@@ -236,7 +236,7 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm3
; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
-; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311530,2863311530]
; CHECK-AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
@@ -255,7 +255,7 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; CHECK-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
-; CHECK-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2863311530,2863311530]
; CHECK-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll
index 83a0ddb..fce8795 100644
--- a/llvm/test/CodeGen/X86/var-permute-128.ll
+++ b/llvm/test/CodeGen/X86/var-permute-128.ll
@@ -241,7 +241,7 @@ define <4 x i32> @var_shuffle_v4i32(<4 x i32> %v, <4 x i32> %indices) nounwind {
;
; SSE41-LABEL: var_shuffle_v4i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [67372036,67372036,67372036,67372036]
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pshufb %xmm1, %xmm0
; SSE41-NEXT: retq
@@ -319,7 +319,7 @@ define <4 x i32> @var_shuffle_zero_v4i32(<4 x i32> %v, <4 x i32> %indices) nounw
; SSE41-NEXT: pmaxud %xmm1, %xmm2
; SSE41-NEXT: pcmpeqd %xmm1, %xmm2
; SSE41-NEXT: por %xmm2, %xmm1
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [67372036,67372036,67372036,67372036]
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: por %xmm2, %xmm1
; SSE41-NEXT: pshufb %xmm1, %xmm0
@@ -1261,7 +1261,7 @@ define <4 x float> @var_shuffle_v4f32(<4 x float> %v, <4 x i32> %indices) nounwi
;
; SSE41-LABEL: var_shuffle_v4f32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [67372036,67372036,67372036,67372036]
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pshufb %xmm1, %xmm0
; SSE41-NEXT: retq
@@ -1339,7 +1339,7 @@ define <4 x float> @var_shuffle_zero_v4f32(<4 x float> %v, <4 x i32> %indices) n
; SSE41-NEXT: pmaxud %xmm1, %xmm2
; SSE41-NEXT: pcmpeqd %xmm1, %xmm2
; SSE41-NEXT: por %xmm2, %xmm1
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [67372036,67372036,67372036,67372036]
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: por %xmm2, %xmm1
; SSE41-NEXT: pshufb %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/vec_reassociate.ll b/llvm/test/CodeGen/X86/vec_reassociate.ll
index a9473fff..4703ca3 100644
--- a/llvm/test/CodeGen/X86/vec_reassociate.ll
+++ b/llvm/test/CodeGen/X86/vec_reassociate.ll
@@ -38,13 +38,13 @@ define <4 x i32> @mul_4i32(<4 x i32> %a0, <4 x i32> %a1) {
; X86-LABEL: mul_4i32:
; X86: # %bb.0:
; X86-NEXT: pmulld %xmm1, %xmm0
-; X86-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [4,6,6,4]
; X86-NEXT: retl
;
; X64-LABEL: mul_4i32:
; X64: # %bb.0:
; X64-NEXT: pmulld %xmm1, %xmm0
-; X64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,6,6,4]
; X64-NEXT: retq
%1 = mul <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 4>
%2 = mul <4 x i32> %a1, <i32 4, i32 3, i32 2, i32 1>
@@ -56,13 +56,13 @@ define <4 x i32> @mul_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
; X86-LABEL: mul_4i32_commute:
; X86: # %bb.0:
; X86-NEXT: pmulld %xmm1, %xmm0
-; X86-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [4,6,6,4]
; X86-NEXT: retl
;
; X64-LABEL: mul_4i32_commute:
; X64: # %bb.0:
; X64-NEXT: pmulld %xmm1, %xmm0
-; X64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,6,6,4]
; X64-NEXT: retq
%1 = mul <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %a0
%2 = mul <4 x i32> <i32 4, i32 3, i32 2, i32 1>, %a1
diff --git a/llvm/test/CodeGen/X86/vector-compress.ll b/llvm/test/CodeGen/X86/vector-compress.ll
index ac932d5..1a63515 100644
--- a/llvm/test/CodeGen/X86/vector-compress.ll
+++ b/llvm/test/CodeGen/X86/vector-compress.ll
@@ -1090,7 +1090,6 @@ define <16 x i8> @test_compress_v16i8(<16 x i8> %vec, <16 x i1> %mask, <16 x i8>
; AVX2-NEXT: pushq %r12
; AVX2-NEXT: pushq %rbx
; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm1
; AVX2-NEXT: vmovaps %xmm2, -{{[0-9]+}}(%rsp)
@@ -1335,7 +1334,6 @@ define <32 x i8> @test_compress_v32i8(<32 x i8> %vec, <32 x i1> %mask, <32 x i8>
; AVX2-NEXT: andq $-32, %rsp
; AVX2-NEXT: subq $64, %rsp
; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vpcmpgtb %ymm1, %ymm3, %ymm3
; AVX2-NEXT: vmovaps %ymm2, (%rsp)
@@ -4733,7 +4731,6 @@ define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vpextrb $0, %xmm0, -{{[0-9]+}}(%rsp)
@@ -4751,72 +4748,7 @@ define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) nounwind {
; AVX2-NEXT: vpextrb $3, %xmm1, %ecx
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: addq %rax, %rcx
-; AVX2-NEXT: vpextrb $4, %xmm0, -24(%rsp,%rcx)
-; AVX2-NEXT: vpextrb $4, %xmm1, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: addq %rcx, %rax
-; AVX2-NEXT: vpextrb $5, %xmm1, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: addq %rax, %rcx
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpextrb $5, %xmm0, -24(%rsp,%rax)
-; AVX2-NEXT: vpextrb $6, %xmm1, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: addq %rcx, %rax
-; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: vpextrb $6, %xmm0, -24(%rsp,%rcx)
-; AVX2-NEXT: vpextrb $7, %xmm1, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: addq %rax, %rcx
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpextrb $7, %xmm0, -24(%rsp,%rax)
-; AVX2-NEXT: vpextrb $8, %xmm1, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: addq %rcx, %rax
-; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: vpextrb $8, %xmm0, -24(%rsp,%rcx)
-; AVX2-NEXT: vpextrb $9, %xmm1, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: addq %rax, %rcx
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpextrb $9, %xmm0, -24(%rsp,%rax)
-; AVX2-NEXT: vpextrb $10, %xmm1, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: addq %rcx, %rax
-; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: vpextrb $10, %xmm0, -24(%rsp,%rcx)
-; AVX2-NEXT: vpextrb $11, %xmm1, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: addq %rax, %rcx
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpextrb $11, %xmm0, -24(%rsp,%rax)
-; AVX2-NEXT: vpextrb $12, %xmm1, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: addq %rcx, %rax
-; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: vpextrb $12, %xmm0, -24(%rsp,%rcx)
-; AVX2-NEXT: vpextrb $13, %xmm1, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: addq %rax, %rcx
-; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpextrb $13, %xmm0, -24(%rsp,%rax)
-; AVX2-NEXT: vpextrb $14, %xmm1, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: addl %ecx, %eax
-; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: vpextrb $14, %xmm0, -24(%rsp,%rcx)
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpextrb $15, %xmm0, -24(%rsp,%rax)
+; AVX2-NEXT: vpextrb $15, %xmm0, -24(%rsp,%rcx)
; AVX2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
; AVX2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index 762900e..a0c2760 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -1821,9 +1821,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm3[0,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [32,u,128,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: por %xmm1, %xmm0
@@ -1841,7 +1841,7 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; SSE41-NEXT: psrld $28, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128]
; SSE41-NEXT: por %xmm2, %xmm0
; SSE41-NEXT: retq
;
@@ -1854,7 +1854,7 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; AVX1-NEXT: vpsrld $28, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
@@ -1935,9 +1935,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm3[0,3]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [16,32,64,128]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [32,u,128,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X86-SSE2-NEXT: por %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index 445e572..2fadf5f 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -1647,7 +1647,7 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [256,512,1024,2048]
; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpsrld $25, %xmm1, %xmm3
; AVX1-NEXT: vpsrld $27, %xmm1, %xmm4
@@ -1656,7 +1656,7 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX1-NEXT: vpsrld $28, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm4[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
index d0690bd..ec2efcd 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -1302,9 +1302,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind {
; SSE2-LABEL: constant_funnnel_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,128,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1316,8 +1316,8 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind {
; SSE41-LABEL: constant_funnnel_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,128,u]
+; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1328,8 +1328,8 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind {
; AVX1-LABEL: constant_funnnel_v4i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,u,128,u]
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1394,9 +1394,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind {
; X86-SSE2-LABEL: constant_funnnel_v4i32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [16,32,64,128]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [32,u,128,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
index 421fa98..5f7e407 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
@@ -1082,13 +1082,13 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x) nounwind {
; AVX1-LABEL: constant_funnnel_v8i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,u,128,u]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [512,u,2048,u]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128]
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [256,512,1024,2048]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vmovshdup {{.*#+}} ymm2 = ymm0[1,1,3,3,5,5,7,7]
; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7]
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
index b378dce..304daab 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
@@ -319,9 +319,9 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
; SSE2-LABEL: constant_funnnel_v2i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,1,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,1,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -333,8 +333,8 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
; SSE41-LABEL: constant_funnnel_v2i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,1,u]
+; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,1,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -345,8 +345,8 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
; AVX1-LABEL: constant_funnnel_v2i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,u,1,u]
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,1,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -411,9 +411,9 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
; X86-SSE2-LABEL: constant_funnnel_v2i32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [16,32,1,1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [32,u,1,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
diff --git a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll
index 06ff7e7..ae5dd18 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll
@@ -500,9 +500,9 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; SSE2-NEXT: psrld $27, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,1,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [32,u,1,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: por %xmm1, %xmm0
@@ -514,7 +514,7 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; SSE41-NEXT: psrld $27, %xmm2
; SSE41-NEXT: psrld $28, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7]
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,1,1]
; SSE41-NEXT: por %xmm2, %xmm0
; SSE41-NEXT: retq
;
@@ -523,7 +523,7 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; AVX1-NEXT: vpsrld $27, %xmm1, %xmm2
; AVX1-NEXT: vpsrld $28, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,1,1]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
@@ -598,9 +598,9 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; X86-SSE2-NEXT: psrld $27, %xmm2
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [16,32,1,1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [32,u,1,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X86-SSE2-NEXT: por %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index d16b28a..33a6a76 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -1741,9 +1741,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm3[0,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,134217728,67108864,33554432]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [134217728,u,33554432,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: por %xmm1, %xmm0
@@ -1761,7 +1761,7 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; SSE41-NEXT: psrld $4, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,134217728,67108864,33554432]
; SSE41-NEXT: por %xmm2, %xmm0
; SSE41-NEXT: retq
;
@@ -1774,7 +1774,7 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; AVX1-NEXT: vpsrld $4, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [268435456,134217728,67108864,33554432]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
@@ -1856,9 +1856,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm3[0,3]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [268435456,134217728,67108864,33554432]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [134217728,u,33554432,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X86-SSE2-NEXT: por %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index a387562..217431be 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -1403,7 +1403,7 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [16777216,8388608,4194304,2097152]
; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpsrld $7, %xmm1, %xmm3
; AVX1-NEXT: vpsrld $5, %xmm1, %xmm4
@@ -1412,7 +1412,7 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX1-NEXT: vpsrld $4, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm4[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [268435456,134217728,67108864,33554432]
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index 4969cb5..5d01dfd 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -1380,9 +1380,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind {
; SSE2-LABEL: constant_funnnel_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,134217728,67108864,33554432]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [134217728,u,33554432,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1394,8 +1394,8 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind {
; SSE41-LABEL: constant_funnnel_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [134217728,u,33554432,u]
+; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,134217728,67108864,33554432]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1406,8 +1406,8 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind {
; AVX1-LABEL: constant_funnnel_v4i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [134217728,u,33554432,u]
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [268435456,134217728,67108864,33554432]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1472,9 +1472,9 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind {
; X86-SSE2-LABEL: constant_funnnel_v4i32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [268435456,134217728,67108864,33554432]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [134217728,u,33554432,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
index e2a3e26..4dc931d 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -1134,13 +1134,13 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x) nounwind {
; AVX1-LABEL: constant_funnnel_v8i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [134217728,u,33554432,u]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [8388608,u,2097152,u]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [268435456,134217728,67108864,33554432]
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [16777216,8388608,4194304,2097152]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vmovshdup {{.*#+}} ymm2 = ymm0[1,1,3,3,5,5,7,7]
; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7]
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
index ef5ffe4..4b42b18 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
@@ -341,9 +341,9 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
; SSE2-LABEL: constant_funnnel_v2i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,134217728,1,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [134217728,u,1,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -355,8 +355,8 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
; SSE41-LABEL: constant_funnnel_v2i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [134217728,u,1,u]
+; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [268435456,134217728,1,1]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -367,8 +367,8 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
; AVX1-LABEL: constant_funnnel_v2i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [134217728,u,1,u]
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [268435456,134217728,1,1]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -433,9 +433,9 @@ define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
; X86-SSE2-LABEL: constant_funnnel_v2i32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [268435456,134217728,1,1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [134217728,u,1,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
index 816d5ca..e68d1d7 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
@@ -171,7 +171,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm2
; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE-NEXT: movdqa {{.*#+}} xmm3 = [37632,37632,37632,37632,37632,37632,37632,37632]
+; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147]
; SSE-NEXT: pmulhw %xmm3, %xmm2
; SSE-NEXT: psrlw $8, %xmm2
; SSE-NEXT: pxor %xmm4, %xmm4
@@ -193,7 +193,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [37632,37632,37632,37632,37632,37632,37632,37632]
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147]
; AVX1-NEXT: vpmulhw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
@@ -260,11 +260,11 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind {
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm2
; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [37632,20224,11008,47872,26368,14592,14592,37632]
+; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,147,0,79,0,43,0,187,0,103,0,57,0,57,0,147]
; SSE-NEXT: psrlw $8, %xmm2
; SSE-NEXT: pxor %xmm3, %xmm3
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37632,33024,14592,26368,47872,11008,20224,37632]
+; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147]
; SSE-NEXT: psrlw $8, %xmm3
; SSE-NEXT: packuswb %xmm2, %xmm3
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -287,10 +287,10 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [37632,20224,11008,47872,26368,14592,14592,37632]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,147,0,79,0,43,0,187,0,103,0,57,0,57,0,147]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37632,33024,14592,26368,47872,11008,20224,37632]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -561,7 +561,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm2
; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE-NEXT: movdqa {{.*#+}} xmm3 = [37632,37632,37632,37632,37632,37632,37632,37632]
+; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147]
; SSE-NEXT: pmulhw %xmm3, %xmm2
; SSE-NEXT: psrlw $8, %xmm2
; SSE-NEXT: pxor %xmm4, %xmm4
@@ -588,7 +588,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [37632,37632,37632,37632,37632,37632,37632,37632]
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147]
; AVX1-NEXT: vpmulhw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
@@ -667,11 +667,11 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [37632,20224,11008,47872,26368,14592,14592,37632]
+; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,147,0,79,0,43,0,187,0,103,0,57,0,57,0,147]
; SSE2-NEXT: psrlw $8, %xmm1
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37632,33024,14592,26368,47872,11008,20224,37632]
+; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147]
; SSE2-NEXT: psrlw $8, %xmm3
; SSE2-NEXT: packuswb %xmm1, %xmm3
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,0,0,255,0,0,255,255,0,0,255,0,0,0,255]
@@ -706,11 +706,11 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [37632,20224,11008,47872,26368,14592,14592,37632]
+; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,147,0,79,0,43,0,187,0,103,0,57,0,57,0,147]
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pxor %xmm3, %xmm3
; SSE41-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37632,33024,14592,26368,47872,11008,20224,37632]
+; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147]
; SSE41-NEXT: psrlw $8, %xmm3
; SSE41-NEXT: packuswb %xmm2, %xmm3
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,0,0,255,0,0,255,255,0,0,255,0,0,0,255]
@@ -741,10 +741,10 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [37632,20224,11008,47872,26368,14592,14592,37632]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,147,0,79,0,43,0,187,0,103,0,57,0,57,0,147]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37632,33024,14592,26368,47872,11008,20224,37632]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
@@ -1116,11 +1116,11 @@ define <16 x i8> @PR143238(<16 x i8> %a0) {
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm2
; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [26368,47872,11008,20224,37632,35072,33024,30976]
+; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,103,0,187,0,43,0,79,0,147,0,137,0,129,0,121]
; SSE-NEXT: psrlw $8, %xmm2
; SSE-NEXT: pxor %xmm3, %xmm3
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [33024,22016,33024,26368,11008,37632,33024,14592]
+; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,129,0,86,0,129,0,103,0,43,0,147,0,129,0,57]
; SSE-NEXT: psrlw $8, %xmm3
; SSE-NEXT: packuswb %xmm2, %xmm3
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -1144,10 +1144,10 @@ define <16 x i8> @PR143238(<16 x i8> %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [26368,47872,11008,20224,37632,35072,33024,30976]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,103,0,187,0,43,0,79,0,147,0,137,0,129,0,121]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [33024,22016,33024,26368,11008,37632,33024,14592]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,129,0,86,0,129,0,103,0,43,0,147,0,129,0,57]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
index 63c69e5..7355f36 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
@@ -161,7 +161,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [37632,37632,37632,37632,37632,37632,37632,37632]
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147]
; AVX1-NEXT: vpmulhw %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
@@ -198,7 +198,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
; AVX2NOBW: # %bb.0:
; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632]
+; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147]
; AVX2NOBW-NEXT: vpmulhw %ymm3, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
@@ -245,10 +245,10 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37632,20224,11008,47872,26368,14592,33024,37632]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [47872,12544,26368,6912,14592,30976,33024,35072]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
@@ -266,10 +266,10 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpcmpgtb %xmm2, %xmm1, %xmm4
; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [35072,33024,30976,14592,6912,26368,12544,47872]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [37632,33024,14592,26368,47872,11008,20224,37632]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147]
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
@@ -291,10 +291,10 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
; AVX2NOBW: # %bb.0:
; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [35072,33024,30976,14592,6912,26368,12544,47872,37632,20224,11008,47872,26368,14592,33024,37632]
+; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187,0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147]
; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37632,33024,14592,26368,47872,11008,20224,37632,47872,12544,26368,6912,14592,30976,33024,35072]
+; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147,0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137]
; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2
; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
@@ -539,7 +539,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [37632,37632,37632,37632,37632,37632,37632,37632]
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147]
; AVX1-NEXT: vpmulhw %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
@@ -585,7 +585,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
; AVX2NOBW: # %bb.0:
; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632]
+; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147]
; AVX2NOBW-NEXT: vpmulhw %ymm3, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
@@ -640,10 +640,10 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37632,20224,11008,47872,26368,14592,33024,37632]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [47872,12544,26368,6912,14592,30976,33024,35072]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm4
@@ -668,10 +668,10 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpor %xmm3, %xmm5, %xmm3
; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [35072,33024,30976,14592,6912,26368,12544,47872]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [37632,33024,14592,26368,47872,11008,20224,37632]
+; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147]
; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
; AVX1-NEXT: vpackuswb %xmm3, %xmm5, %xmm3
; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
@@ -699,10 +699,10 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
; AVX2NOBW: # %bb.0:
; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [35072,33024,30976,14592,6912,26368,12544,47872,37632,20224,11008,47872,26368,14592,33024,37632]
+; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187,0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147]
; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37632,33024,14592,26368,47872,11008,20224,37632,47872,12544,26368,6912,14592,30976,33024,35072]
+; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147,0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137]
; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2
; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
index 6bc4fcb..5445330 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
@@ -132,7 +132,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31]
-; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632]
+; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147]
; AVX512F-NEXT: vpmulhw %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23]
@@ -169,7 +169,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632]
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147]
; AVX512BW-NEXT: vpmulhw %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55]
@@ -199,10 +199,10 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
-; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [35072,18176,37632,4864,20224,10496,11008,45824,37632,20224,11008,47872,26368,14592,33024,37632]
+; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,137,0,71,0,147,0,19,0,79,0,41,0,43,0,179,0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147]
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
-; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [6912,28416,14592,15104,30976,32000,33024,34048,47872,12544,26368,6912,14592,30976,33024,35072]
+; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [0,27,0,111,0,57,0,59,0,121,0,125,0,129,0,133,0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137]
; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX512F-NEXT: vpackuswb %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm3
@@ -220,10 +220,10 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm1, %ymm4
; AVX512F-NEXT: vpsubb %ymm4, %ymm2, %ymm2
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912]
+; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187,0,133,0,129,0,125,0,121,0,59,0,57,0,111,0,27]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [37632,33024,14592,26368,47872,11008,20224,37632,45824,11008,10496,20224,4864,37632,18176,35072]
+; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147,0,179,0,43,0,41,0,79,0,19,0,147,0,71,0,137]
; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512F-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0
@@ -245,10 +245,10 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
-; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912,35072,18176,37632,4864,20224,10496,11008,45824,37632,20224,11008,47872,26368,14592,33024,37632]
+; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187,0,133,0,129,0,125,0,121,0,59,0,57,0,111,0,27,0,137,0,71,0,147,0,19,0,79,0,41,0,43,0,179,0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147]
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55]
-; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [37632,33024,14592,26368,47872,11008,20224,37632,45824,11008,10496,20224,4864,37632,18176,35072,6912,28416,14592,15104,30976,32000,33024,34048,47872,12544,26368,6912,14592,30976,33024,35072]
+; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147,0,179,0,43,0,41,0,79,0,19,0,147,0,71,0,137,0,27,0,111,0,57,0,59,0,121,0,125,0,129,0,133,0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137]
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
@@ -444,7 +444,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
-; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632]
+; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147]
; AVX512F-NEXT: vpmulhw %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
@@ -490,7 +490,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632]
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147,0,147]
; AVX512BW-NEXT: vpmulhw %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55]
@@ -524,10 +524,10 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
-; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [35072,18176,37632,4864,20224,10496,11008,45824,37632,20224,11008,47872,26368,14592,33024,37632]
+; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,137,0,71,0,147,0,19,0,79,0,41,0,43,0,179,0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147]
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
-; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [6912,28416,14592,15104,30976,32000,33024,34048,47872,12544,26368,6912,14592,30976,33024,35072]
+; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,27,0,111,0,57,0,59,0,121,0,125,0,129,0,133,0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm4
@@ -552,10 +552,10 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpor %ymm3, %ymm5, %ymm3
; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912]
+; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187,0,133,0,129,0,125,0,121,0,59,0,57,0,111,0,27]
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [37632,33024,14592,26368,47872,11008,20224,37632,45824,11008,10496,20224,4864,37632,18176,35072]
+; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147,0,179,0,43,0,41,0,79,0,19,0,147,0,71,0,137]
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
; AVX512F-NEXT: vpackuswb %ymm3, %ymm5, %ymm3
; AVX512F-NEXT: vpaddb %ymm4, %ymm3, %ymm3
@@ -583,10 +583,10 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
-; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912,35072,18176,37632,4864,20224,10496,11008,45824,37632,20224,11008,47872,26368,14592,33024,37632]
+; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [0,137,0,129,0,121,0,57,0,27,0,103,0,49,0,187,0,133,0,129,0,125,0,121,0,59,0,57,0,111,0,27,0,137,0,71,0,147,0,19,0,79,0,41,0,43,0,179,0,147,0,79,0,43,0,187,0,103,0,57,0,129,0,147]
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55]
-; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [37632,33024,14592,26368,47872,11008,20224,37632,45824,11008,10496,20224,4864,37632,18176,35072,6912,28416,14592,15104,30976,32000,33024,34048,47872,12544,26368,6912,14592,30976,33024,35072]
+; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [0,147,0,129,0,57,0,103,0,187,0,43,0,79,0,147,0,179,0,43,0,41,0,79,0,19,0,147,0,71,0,137,0,27,0,111,0,57,0,59,0,121,0,125,0,129,0,133,0,187,0,49,0,103,0,27,0,57,0,121,0,129,0,137]
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
index 33d80f6..6cd5098 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
@@ -169,7 +169,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [37,37,37,37,37,37,37,37]
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; SSE2-NEXT: pmullw %xmm3, %xmm2
; SSE2-NEXT: psrlw $8, %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm4
@@ -209,7 +209,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [37,37,37,37,37,37,37,37]
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
@@ -270,22 +270,22 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind {
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,256,256,256,256,256,256,256]
; SSE2-NEXT: psrlw $8, %xmm2
-; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [147,79,171,117,205,57,57,37]
+; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [147,0,79,0,171,0,117,0,205,0,57,0,57,0,37,0]
; SSE2-NEXT: psrlw $8, %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,256,256,256,256,256,256,128]
; SSE2-NEXT: psrlw $8, %xmm3
-; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37,32,57,205,117,171,79,147]
+; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0]
; SSE2-NEXT: psrlw $8, %xmm3
; SSE2-NEXT: packuswb %xmm2, %xmm3
; SSE2-NEXT: psubb %xmm3, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,128,0,0,0,128]
+; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; SSE2-NEXT: psrlw $8, %xmm2
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,0,0,0,128,0,0,0]
+; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0]
; SSE2-NEXT: psrlw $8, %xmm0
; SSE2-NEXT: packuswb %xmm2, %xmm0
; SSE2-NEXT: paddb %xmm3, %xmm0
@@ -309,7 +309,7 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind {
; SSE41-NEXT: psllw $7, %xmm3
; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3,4,5,6,7]
; SSE41-NEXT: psrlw $8, %xmm3
-; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [147,79,171,117,205,57,57,37]
+; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [147,0,79,0,171,0,117,0,205,0,57,0,57,0,37,0]
; SSE41-NEXT: psrlw $8, %xmm3
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
@@ -317,15 +317,15 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind {
; SSE41-NEXT: psllw $7, %xmm4
; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1,2,3,4,5,6],xmm4[7]
; SSE41-NEXT: psrlw $8, %xmm4
-; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [37,32,57,205,117,171,79,147]
+; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0]
; SSE41-NEXT: psrlw $8, %xmm4
; SSE41-NEXT: packuswb %xmm3, %xmm4
; SSE41-NEXT: psubb %xmm4, %xmm0
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,128,0,0,0,128]
+; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,0,0,0,128,0,0,0]
+; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0]
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: packuswb %xmm0, %xmm2
; SSE41-NEXT: paddb %xmm4, %xmm2
@@ -346,22 +346,22 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind {
; AVX1-NEXT: vpsllw $7, %xmm3, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [147,79,171,117,205,57,57,37]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [147,0,79,0,171,0,117,0,205,0,57,0,57,0,37,0]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37,32,57,205,117,171,79,147]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,0,0,128,0,0,0,128]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,0,0,0,128,0,0,0]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0]
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
@@ -638,7 +638,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [37,37,37,37,37,37,37,37]
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; SSE2-NEXT: pmullw %xmm3, %xmm2
; SSE2-NEXT: psrlw $8, %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm4
@@ -690,7 +690,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [37,37,37,37,37,37,37,37]
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
@@ -763,23 +763,23 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,256,256,256,256,256,256,256]
; SSE2-NEXT: psrlw $8, %xmm2
-; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [147,79,171,117,205,57,57,37]
+; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [147,0,79,0,171,0,117,0,205,0,57,0,57,0,37,0]
; SSE2-NEXT: psrlw $8, %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,256,256,256,256,256,256,128]
; SSE2-NEXT: psrlw $8, %xmm3
-; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37,32,57,205,117,171,79,147]
+; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0]
; SSE2-NEXT: psrlw $8, %xmm3
; SSE2-NEXT: packuswb %xmm2, %xmm3
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: psubb %xmm3, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
-; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [0,0,0,128,0,0,0,128]
+; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; SSE2-NEXT: psrlw $8, %xmm4
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,0,0,0,128,0,0,0]
+; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0]
; SSE2-NEXT: psrlw $8, %xmm2
; SSE2-NEXT: packuswb %xmm4, %xmm2
; SSE2-NEXT: paddb %xmm3, %xmm2
@@ -809,7 +809,7 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
; SSE41-NEXT: psllw $7, %xmm3
; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3,4,5,6,7]
; SSE41-NEXT: psrlw $8, %xmm3
-; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [147,79,171,117,205,57,57,37]
+; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [147,0,79,0,171,0,117,0,205,0,57,0,57,0,37,0]
; SSE41-NEXT: psrlw $8, %xmm3
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
@@ -817,16 +817,16 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
; SSE41-NEXT: psllw $7, %xmm4
; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1,2,3,4,5,6],xmm4[7]
; SSE41-NEXT: psrlw $8, %xmm4
-; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [37,32,57,205,117,171,79,147]
+; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0]
; SSE41-NEXT: psrlw $8, %xmm4
; SSE41-NEXT: packuswb %xmm3, %xmm4
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psubb %xmm4, %xmm2
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,128,0,0,0,128]
+; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [128,0,0,0,128,0,0,0]
+; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0]
; SSE41-NEXT: psrlw $8, %xmm3
; SSE41-NEXT: packuswb %xmm2, %xmm3
; SSE41-NEXT: paddb %xmm4, %xmm3
@@ -854,22 +854,22 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind {
; AVX1-NEXT: vpsllw $7, %xmm3, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [147,79,171,117,205,57,57,37]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [147,0,79,0,171,0,117,0,205,0,57,0,57,0,37,0]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37,32,57,205,117,171,79,147]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm3
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15]
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,128,0,0,0,128]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [128,0,0,0,128,0,0,0]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
index e43108f..98ea87c 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
@@ -166,7 +166,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [37,37,37,37,37,37,37,37]
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
@@ -200,7 +200,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
; AVX2NOBW: # %bb.0:
; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
-; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
+; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
@@ -246,22 +246,22 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3,4,5,6,7]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [147,79,171,117,205,57,32,37]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0],xmm4[1,2,3,4,5,6,7]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [187,135,205,27,57,241,16,137]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,128,0,0,0,128]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,128,0,0,0,0,0,0]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2
@@ -276,22 +276,22 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [137,16,241,57,27,205,135,187]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [37,32,57,205,117,171,79,147]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,0,0,0,128,0]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,0,0,0,128,0,0,0]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0]
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
@@ -312,20 +312,20 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [256,256,256,256,256,256,256,128,128,256,256,256,256,256,256,256]
; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2
-; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [137,16,241,57,27,205,135,187,147,79,171,117,205,57,32,37]
+; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0,147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0]
; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm4, %ymm3
; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37,32,57,205,117,171,79,147,187,135,205,27,57,241,16,137]
+; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0,187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0]
; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2
; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm0, %ymm0
; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
-; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,0,0,0,0,0,128,0,0,0,0,128,0,0,0,128]
+; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
-; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,0,0,128,0,0,0,0,128,0,0,0,0,0,0]
+; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX2NOBW-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX2NOBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
; AVX2NOBW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
@@ -578,7 +578,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [37,37,37,37,37,37,37,37]
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; AVX1-NEXT: vpmullw %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
@@ -622,7 +622,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
; AVX2NOBW: # %bb.0:
; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
-; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
+; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
@@ -676,22 +676,22 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3,4,5,6,7]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [147,79,171,117,205,57,32,37]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0],xmm4[1,2,3,4,5,6,7]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [187,135,205,27,57,241,16,137]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [0,0,0,128,0,0,0,128]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,128,0,0,0,0,0,0]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpackuswb %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm3
@@ -713,22 +713,22 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm5
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [137,16,241,57,27,205,135,187]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX1-NEXT: vpsllw $7, %xmm6, %xmm6
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3,4,5,6],xmm6[7]
; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [37,32,57,205,117,171,79,147]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0]
; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
; AVX1-NEXT: vpackuswb %xmm4, %xmm5, %xmm4
; AVX1-NEXT: vpsubb %xmm4, %xmm0, %xmm5
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm5[8],xmm1[8],xmm5[9],xmm1[9],xmm5[10],xmm1[10],xmm5[11],xmm1[11],xmm5[12],xmm1[12],xmm5[13],xmm1[13],xmm5[14],xmm1[14],xmm5[15],xmm1[15]
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [0,0,0,0,0,0,128,0]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0]
; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
-; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [128,0,0,0,128,0,0,0]
+; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0]
; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
; AVX1-NEXT: vpackuswb %xmm6, %xmm5, %xmm5
; AVX1-NEXT: vpaddb %xmm4, %xmm5, %xmm4
@@ -755,20 +755,20 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [256,256,256,256,256,256,256,128,128,256,256,256,256,256,256,256]
; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2
-; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [137,16,241,57,27,205,135,187,147,79,171,117,205,57,32,37]
+; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0,147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0]
; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm4, %ymm3
; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37,32,57,205,117,171,79,147,187,135,205,27,57,241,16,137]
+; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0,187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0]
; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2
; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm0, %ymm3
; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8],ymm1[8],ymm3[9],ymm1[9],ymm3[10],ymm1[10],ymm3[11],ymm1[11],ymm3[12],ymm1[12],ymm3[13],ymm1[13],ymm3[14],ymm1[14],ymm3[15],ymm1[15],ymm3[24],ymm1[24],ymm3[25],ymm1[25],ymm3[26],ymm1[26],ymm3[27],ymm1[27],ymm3[28],ymm1[28],ymm3[29],ymm1[29],ymm3[30],ymm1[30],ymm3[31],ymm1[31]
-; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,128,0,0,0,0,128,0,0,0,128]
+; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; AVX2NOBW-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm1[0],ymm3[1],ymm1[1],ymm3[2],ymm1[2],ymm3[3],ymm1[3],ymm3[4],ymm1[4],ymm3[5],ymm1[5],ymm3[6],ymm1[6],ymm3[7],ymm1[7],ymm3[16],ymm1[16],ymm3[17],ymm1[17],ymm3[18],ymm1[18],ymm3[19],ymm1[19],ymm3[20],ymm1[20],ymm3[21],ymm1[21],ymm3[22],ymm1[22],ymm3[23],ymm1[23]
-; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [128,0,0,0,128,0,0,0,0,128,0,0,0,0,0,0]
+; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX2NOBW-NEXT: vpackuswb %ymm4, %ymm3, %ymm3
; AVX2NOBW-NEXT: vpaddb %ymm2, %ymm3, %ymm2
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
index bf98bcc..a11fa370 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
@@ -135,7 +135,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
-; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
+; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
@@ -169,7 +169,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; AVX512BW-NEXT: vpmullw %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
@@ -199,20 +199,20 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31]
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,256,64,256,256,256,256,256,128,256,256,256,256,256,256,256]
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,27,37,19,79,41,171,101,147,79,171,117,205,57,32,37]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,0,27,0,37,0,19,0,79,0,41,0,171,0,101,0,147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0]
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23]
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [256,256,256,256,256,256,256,256,128,256,256,256,256,256,256,256]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [27,111,57,235,241,249,8,9,187,135,205,27,57,241,16,137]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [27,0,111,0,57,0,235,0,241,0,249,0,8,0,9,0,187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31]
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,128,0,0,0,0,0,128,0,0,0,128,0,0,0,128]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23]
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [0,0,0,0,0,0,0,128,0,128,0,0,0,0,0,0]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX512F-NEXT: vpackuswb %ymm4, %ymm2, %ymm2
; AVX512F-NEXT: vpaddb %ymm3, %ymm2, %ymm2
@@ -226,20 +226,20 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,256,256,256,256,256,256,128,256,256,256,256,256,256,256,256]
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,16,241,57,27,205,135,187,9,8,249,241,235,57,111,27]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0,9,0,8,0,249,0,241,0,235,0,57,0,111,0,27,0]
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [256,256,256,256,256,256,256,128,256,256,256,256,256,64,256,256]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [37,32,57,205,117,171,79,147,101,171,41,79,19,37,27,137]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0,101,0,171,0,41,0,79,0,19,0,37,0,27,0,137,0]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vpsubb %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,128,0,128,0,0,0,0,0,0,0]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,0,0,128,0,0,0,128,0,0,0,0,0,128,0]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0]
; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512F-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0
@@ -259,20 +259,20 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [137,16,241,57,27,205,135,187,9,8,249,241,235,57,111,27,137,27,37,19,79,41,171,101,147,79,171,117,205,57,32,37]
+; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0,9,0,8,0,249,0,241,0,235,0,57,0,111,0,27,0,137,0,27,0,37,0,19,0,79,0,41,0,171,0,101,0,147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0]
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
-; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [37,32,57,205,117,171,79,147,101,171,41,79,19,37,27,137,27,111,57,235,241,249,8,9,187,135,205,27,57,241,16,137]
+; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0,101,0,171,0,41,0,79,0,19,0,37,0,27,0,137,0,27,0,111,0,57,0,235,0,241,0,249,0,8,0,9,0,187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0]
; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
; AVX512BW-NEXT: vpackuswb %zmm2, %zmm3, %zmm2
; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
-; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [0,0,0,0,0,0,128,0,128,0,0,0,0,0,0,0,0,128,0,0,0,0,0,128,0,0,0,128,0,0,0,128]
+; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
-; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [128,0,0,0,128,0,0,0,128,0,0,0,0,0,128,0,0,0,0,0,0,0,0,128,0,128,0,0,0,0,0,0]
+; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
; AVX512BW-NEXT: vpackuswb %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
@@ -473,7 +473,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
-; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
+; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; AVX512F-NEXT: vpmullw %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
@@ -517,7 +517,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0,37,0]
; AVX512BW-NEXT: vpmullw %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
@@ -551,20 +551,20 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31]
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,256,64,256,256,256,256,256,128,256,256,256,256,256,256,256]
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,27,37,19,79,41,171,101,147,79,171,117,205,57,32,37]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,0,27,0,37,0,19,0,79,0,41,0,171,0,101,0,147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0]
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23]
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [256,256,256,256,256,256,256,256,128,256,256,256,256,256,256,256]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [27,111,57,235,241,249,8,9,187,135,205,27,57,241,16,137]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [27,0,111,0,57,0,235,0,241,0,249,0,8,0,9,0,187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm4
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15],ymm4[24],ymm1[24],ymm4[25],ymm1[25],ymm4[26],ymm1[26],ymm4[27],ymm1[27],ymm4[28],ymm1[28],ymm4[29],ymm1[29],ymm4[30],ymm1[30],ymm4[31],ymm1[31]
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [0,128,0,0,0,0,0,128,0,0,0,128,0,0,0,128]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[16],ymm1[16],ymm4[17],ymm1[17],ymm4[18],ymm1[18],ymm4[19],ymm1[19],ymm4[20],ymm1[20],ymm4[21],ymm1[21],ymm4[22],ymm1[22],ymm4[23],ymm1[23]
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,0,128,0,128,0,0,0,0,0,0]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpackuswb %ymm5, %ymm4, %ymm4
; AVX512F-NEXT: vpaddb %ymm3, %ymm4, %ymm3
@@ -585,20 +585,20 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [256,256,256,256,256,256,256,128,256,256,256,256,256,256,256,256]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [137,16,241,57,27,205,135,187,9,8,249,241,235,57,111,27]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0,9,0,8,0,249,0,241,0,235,0,57,0,111,0,27,0]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [256,256,256,256,256,256,256,128,256,256,256,256,256,64,256,256]
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [37,32,57,205,117,171,79,147,101,171,41,79,19,37,27,137]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0,101,0,171,0,41,0,79,0,19,0,37,0,27,0,137,0]
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
; AVX512F-NEXT: vpackuswb %ymm4, %ymm5, %ymm4
; AVX512F-NEXT: vpsubb %ymm4, %ymm0, %ymm5
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm6 = ymm5[8],ymm1[8],ymm5[9],ymm1[9],ymm5[10],ymm1[10],ymm5[11],ymm1[11],ymm5[12],ymm1[12],ymm5[13],ymm1[13],ymm5[14],ymm1[14],ymm5[15],ymm1[15],ymm5[24],ymm1[24],ymm5[25],ymm1[25],ymm5[26],ymm1[26],ymm5[27],ymm1[27],ymm5[28],ymm1[28],ymm5[29],ymm1[29],ymm5[30],ymm1[30],ymm5[31],ymm1[31]
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 # [0,0,0,0,0,0,128,0,128,0,0,0,0,0,0,0]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm5[0],ymm1[0],ymm5[1],ymm1[1],ymm5[2],ymm1[2],ymm5[3],ymm1[3],ymm5[4],ymm1[4],ymm5[5],ymm1[5],ymm5[6],ymm1[6],ymm5[7],ymm1[7],ymm5[16],ymm1[16],ymm5[17],ymm1[17],ymm5[18],ymm1[18],ymm5[19],ymm1[19],ymm5[20],ymm1[20],ymm5[21],ymm1[21],ymm5[22],ymm1[22],ymm5[23],ymm1[23]
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [128,0,0,0,128,0,0,0,128,0,0,0,0,0,128,0]
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0]
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
; AVX512F-NEXT: vpackuswb %ymm6, %ymm5, %ymm5
; AVX512F-NEXT: vpaddb %ymm4, %ymm5, %ymm4
@@ -624,20 +624,20 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [137,16,241,57,27,205,135,187,9,8,249,241,235,57,111,27,137,27,37,19,79,41,171,101,147,79,171,117,205,57,32,37]
+; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [137,0,16,0,241,0,57,0,27,0,205,0,135,0,187,0,9,0,8,0,249,0,241,0,235,0,57,0,111,0,27,0,137,0,27,0,37,0,19,0,79,0,41,0,171,0,101,0,147,0,79,0,171,0,117,0,205,0,57,0,32,0,37,0]
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
-; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [37,32,57,205,117,171,79,147,101,171,41,79,19,37,27,137,27,111,57,235,241,249,8,9,187,135,205,27,57,241,16,137]
+; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [37,0,32,0,57,0,205,0,117,0,171,0,79,0,147,0,101,0,171,0,41,0,79,0,19,0,37,0,27,0,137,0,27,0,111,0,57,0,235,0,241,0,249,0,8,0,9,0,187,0,135,0,205,0,27,0,57,0,241,0,16,0,137,0]
; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
; AVX512BW-NEXT: vpackuswb %zmm2, %zmm3, %zmm2
; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm3
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm3[8],zmm1[8],zmm3[9],zmm1[9],zmm3[10],zmm1[10],zmm3[11],zmm1[11],zmm3[12],zmm1[12],zmm3[13],zmm1[13],zmm3[14],zmm1[14],zmm3[15],zmm1[15],zmm3[24],zmm1[24],zmm3[25],zmm1[25],zmm3[26],zmm1[26],zmm3[27],zmm1[27],zmm3[28],zmm1[28],zmm3[29],zmm1[29],zmm3[30],zmm1[30],zmm3[31],zmm1[31],zmm3[40],zmm1[40],zmm3[41],zmm1[41],zmm3[42],zmm1[42],zmm3[43],zmm1[43],zmm3[44],zmm1[44],zmm3[45],zmm1[45],zmm3[46],zmm1[46],zmm3[47],zmm1[47],zmm3[56],zmm1[56],zmm3[57],zmm1[57],zmm3[58],zmm1[58],zmm3[59],zmm1[59],zmm3[60],zmm1[60],zmm3[61],zmm1[61],zmm3[62],zmm1[62],zmm3[63],zmm1[63]
-; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4 # [0,0,0,0,0,0,128,0,128,0,0,0,0,0,0,0,0,128,0,0,0,0,0,128,0,0,0,128,0,0,0,128]
+; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4 # [0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0]
; AVX512BW-NEXT: vpsrlw $8, %zmm4, %zmm4
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm3[0],zmm1[0],zmm3[1],zmm1[1],zmm3[2],zmm1[2],zmm3[3],zmm1[3],zmm3[4],zmm1[4],zmm3[5],zmm1[5],zmm3[6],zmm1[6],zmm3[7],zmm1[7],zmm3[16],zmm1[16],zmm3[17],zmm1[17],zmm3[18],zmm1[18],zmm3[19],zmm1[19],zmm3[20],zmm1[20],zmm3[21],zmm1[21],zmm3[22],zmm1[22],zmm3[23],zmm1[23],zmm3[32],zmm1[32],zmm3[33],zmm1[33],zmm3[34],zmm1[34],zmm3[35],zmm1[35],zmm3[36],zmm1[36],zmm3[37],zmm1[37],zmm3[38],zmm1[38],zmm3[39],zmm1[39],zmm3[48],zmm1[48],zmm3[49],zmm1[49],zmm3[50],zmm1[50],zmm3[51],zmm1[51],zmm3[52],zmm1[52],zmm3[53],zmm1[53],zmm3[54],zmm1[54],zmm3[55],zmm1[55]
-; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [128,0,0,0,128,0,0,0,128,0,0,0,0,0,128,0,0,0,0,0,0,0,0,128,0,128,0,0,0,0,0,0]
+; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,0,0,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
; AVX512BW-NEXT: vpackuswb %zmm4, %zmm3, %zmm3
; AVX512BW-NEXT: vpaddb %zmm2, %zmm3, %zmm2
diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll
index 6e1bf25..d0bb90c 100644
--- a/llvm/test/CodeGen/X86/vector-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-mul.ll
@@ -130,31 +130,31 @@ define <4 x i32> @mul_v4i32_1_2_4_8(<4 x i32> %a0) nounwind {
; X86-SSE2-LABEL: mul_v4i32_1_2_4_8:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [2,u,8,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE2-NEXT: retl
;
; X86-SSE4-LABEL: mul_v4i32_1_2_4_8:
; X86-SSE4: # %bb.0:
-; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8]
; X86-SSE4-NEXT: retl
;
; X64-SSE2-LABEL: mul_v4i32_1_2_4_8:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,u,8,u]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-SSE2-NEXT: retq
;
; X64-SSE4-LABEL: mul_v4i32_1_2_4_8:
; X64-SSE4: # %bb.0:
-; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8]
; X64-SSE4-NEXT: retq
;
; X64-XOP-LABEL: mul_v4i32_1_2_4_8:
@@ -190,12 +190,12 @@ define <4 x i32> @mul_v4i32_1_2_4_8_optsize(<4 x i32> %a0) nounwind optsize {
;
; X86-SSE4-LABEL: mul_v4i32_1_2_4_8_optsize:
; X86-SSE4: # %bb.0:
-; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8]
; X86-SSE4-NEXT: retl
;
; X64-SSE4-LABEL: mul_v4i32_1_2_4_8_optsize:
; X64-SSE4: # %bb.0:
-; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8]
; X64-SSE4-NEXT: retq
;
; X64-XOP-LABEL: mul_v4i32_1_2_4_8_optsize:
@@ -989,7 +989,7 @@ define <2 x i64> @mul_v2i64_17_65(<2 x i64> %a0) nounwind {
;
; X64-AVX512DQ-LABEL: mul_v2i64_17_65:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [17,65]
; X64-AVX512DQ-NEXT: retq
%1 = mul <2 x i64> %a0, <i64 17, i64 65>
ret <2 x i64> %1
@@ -999,36 +999,36 @@ define <4 x i32> @mul_v4i32_5_17_33_65(<4 x i32> %a0) nounwind {
; X86-SSE2-LABEL: mul_v4i32_5_17_33_65:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [5,17,33,65]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [17,u,65,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE2-NEXT: retl
;
; X86-SSE4-LABEL: mul_v4i32_5_17_33_65:
; X86-SSE4: # %bb.0:
-; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [5,17,33,65]
; X86-SSE4-NEXT: retl
;
; X64-SSE2-LABEL: mul_v4i32_5_17_33_65:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [5,17,33,65]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [17,u,65,u]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-SSE2-NEXT: retq
;
; X64-SSE4-LABEL: mul_v4i32_5_17_33_65:
; X64-SSE4: # %bb.0:
-; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [5,17,33,65]
; X64-SSE4-NEXT: retq
;
; X64-AVX-LABEL: mul_v4i32_5_17_33_65:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [5,17,33,65]
; X64-AVX-NEXT: retq
%1 = mul <4 x i32> %a0, <i32 5, i32 17, i32 33, i32 65>
ret <4 x i32> %1
@@ -1384,7 +1384,7 @@ define <2 x i64> @mul_v2i64_15_63(<2 x i64> %a0) nounwind {
;
; X64-AVX512DQ-LABEL: mul_v2i64_15_63:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [15,63]
; X64-AVX512DQ-NEXT: retq
%1 = mul <2 x i64> %a0, <i64 15, i64 63>
ret <2 x i64> %1
@@ -1427,7 +1427,7 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind {
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
; X64-SSE2-NEXT: psrlq $32, %xmm3
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm3
-; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4294967295,4294967295]
; X64-SSE2-NEXT: paddq %xmm3, %xmm0
; X64-SSE2-NEXT: psllq $32, %xmm0
; X64-SSE2-NEXT: paddq %xmm2, %xmm0
@@ -1441,7 +1441,7 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind {
; X64-SSE4-NEXT: movdqa %xmm0, %xmm3
; X64-SSE4-NEXT: psrlq $32, %xmm3
; X64-SSE4-NEXT: pmuludq %xmm1, %xmm3
-; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4294967295,4294967295]
; X64-SSE4-NEXT: paddq %xmm3, %xmm0
; X64-SSE4-NEXT: psllq $32, %xmm0
; X64-SSE4-NEXT: paddq %xmm2, %xmm0
@@ -1453,7 +1453,7 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind {
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm3
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
-; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4294967295,4294967295]
; X64-XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; X64-XOP-NEXT: vpsllq $32, %xmm0, %xmm0
; X64-XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0
@@ -1465,7 +1465,7 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind {
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
-; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4294967295,4294967295]
; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
@@ -1473,7 +1473,7 @@ define <2 x i64> @mul_v2i64_neg_15_63(<2 x i64> %a0) nounwind {
;
; X64-AVX512DQ-LABEL: mul_v2i64_neg_15_63:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18446744073709551601,18446744073709551553]
; X64-AVX512DQ-NEXT: retq
%1 = mul <2 x i64> %a0, <i64 -15, i64 -63>
ret <2 x i64> %1
@@ -1516,7 +1516,7 @@ define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind {
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
; X64-SSE2-NEXT: psrlq $32, %xmm3
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm3
-; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4294967295,4294967295]
; X64-SSE2-NEXT: paddq %xmm3, %xmm0
; X64-SSE2-NEXT: psllq $32, %xmm0
; X64-SSE2-NEXT: paddq %xmm2, %xmm0
@@ -1530,7 +1530,7 @@ define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind {
; X64-SSE4-NEXT: movdqa %xmm0, %xmm3
; X64-SSE4-NEXT: psrlq $32, %xmm3
; X64-SSE4-NEXT: pmuludq %xmm1, %xmm3
-; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4294967295,4294967295]
; X64-SSE4-NEXT: paddq %xmm3, %xmm0
; X64-SSE4-NEXT: psllq $32, %xmm0
; X64-SSE4-NEXT: paddq %xmm2, %xmm0
@@ -1542,7 +1542,7 @@ define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind {
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm3
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
-; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4294967295,4294967295]
; X64-XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; X64-XOP-NEXT: vpsllq $32, %xmm0, %xmm0
; X64-XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0
@@ -1554,7 +1554,7 @@ define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind {
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
-; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4294967295,4294967295]
; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
@@ -1562,7 +1562,7 @@ define <2 x i64> @mul_v2i64_neg_17_65(<2 x i64> %a0) nounwind {
;
; X64-AVX512DQ-LABEL: mul_v2i64_neg_17_65:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18446744073709551599,18446744073709551551]
; X64-AVX512DQ-NEXT: retq
%1 = mul <2 x i64> %a0, <i64 -17, i64 -65>
ret <2 x i64> %1
@@ -1600,7 +1600,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind {
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
; X86-SSE2-NEXT: psrlq $32, %xmm3
; X86-SSE2-NEXT: pmuludq %xmm1, %xmm3
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,0,u,u,u,u,255,255,255,255,u,u,u,u]
; X86-SSE2-NEXT: paddq %xmm3, %xmm0
; X86-SSE2-NEXT: psllq $32, %xmm0
; X86-SSE2-NEXT: paddq %xmm2, %xmm0
@@ -1614,7 +1614,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind {
; X86-SSE4-NEXT: movdqa %xmm0, %xmm3
; X86-SSE4-NEXT: psrlq $32, %xmm3
; X86-SSE4-NEXT: pmuludq %xmm1, %xmm3
-; X86-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,0,u,u,u,u,255,255,255,255,u,u,u,u]
; X86-SSE4-NEXT: paddq %xmm3, %xmm0
; X86-SSE4-NEXT: psllq $32, %xmm0
; X86-SSE4-NEXT: paddq %xmm2, %xmm0
@@ -1628,7 +1628,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind {
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
; X64-SSE2-NEXT: psrlq $32, %xmm3
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm3
-; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0]
; X64-SSE2-NEXT: paddq %xmm3, %xmm0
; X64-SSE2-NEXT: psllq $32, %xmm0
; X64-SSE2-NEXT: paddq %xmm2, %xmm0
@@ -1642,7 +1642,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind {
; X64-SSE4-NEXT: movdqa %xmm0, %xmm3
; X64-SSE4-NEXT: psrlq $32, %xmm3
; X64-SSE4-NEXT: pmuludq %xmm1, %xmm3
-; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0]
; X64-SSE4-NEXT: paddq %xmm3, %xmm0
; X64-SSE4-NEXT: psllq $32, %xmm0
; X64-SSE4-NEXT: paddq %xmm2, %xmm0
@@ -1654,7 +1654,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind {
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm3
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
-; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0]
; X64-XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; X64-XOP-NEXT: vpsllq $32, %xmm0, %xmm0
; X64-XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0
@@ -1666,7 +1666,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind {
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
-; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0]
; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
@@ -1674,7 +1674,7 @@ define <2 x i64> @mul_v2i64_neg_0_1(<2 x i64> %a0) nounwind {
;
; X64-AVX512DQ-LABEL: mul_v2i64_neg_0_1:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; X64-AVX512DQ-NEXT: retq
%1 = mul <2 x i64> %a0, <i64 0, i64 -1>
ret <2 x i64> %1
@@ -1689,7 +1689,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind {
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
; X86-SSE2-NEXT: psrlq $32, %xmm3
; X86-SSE2-NEXT: pmuludq %xmm1, %xmm3
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,0,u,u,u,u,255,255,255,255,u,u,u,u]
; X86-SSE2-NEXT: paddq %xmm3, %xmm0
; X86-SSE2-NEXT: psllq $32, %xmm0
; X86-SSE2-NEXT: paddq %xmm2, %xmm0
@@ -1703,7 +1703,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind {
; X86-SSE4-NEXT: movdqa %xmm0, %xmm3
; X86-SSE4-NEXT: psrlq $32, %xmm3
; X86-SSE4-NEXT: pmuludq %xmm1, %xmm3
-; X86-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,0,u,u,u,u,255,255,255,255,u,u,u,u]
; X86-SSE4-NEXT: paddq %xmm3, %xmm0
; X86-SSE4-NEXT: psllq $32, %xmm0
; X86-SSE4-NEXT: paddq %xmm2, %xmm0
@@ -1717,7 +1717,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind {
; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
; X64-SSE2-NEXT: psrlq $32, %xmm3
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm3
-; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0]
; X64-SSE2-NEXT: paddq %xmm3, %xmm0
; X64-SSE2-NEXT: psllq $32, %xmm0
; X64-SSE2-NEXT: paddq %xmm2, %xmm0
@@ -1731,7 +1731,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind {
; X64-SSE4-NEXT: movdqa %xmm0, %xmm3
; X64-SSE4-NEXT: psrlq $32, %xmm3
; X64-SSE4-NEXT: pmuludq %xmm1, %xmm3
-; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE4-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0]
; X64-SSE4-NEXT: paddq %xmm3, %xmm0
; X64-SSE4-NEXT: psllq $32, %xmm0
; X64-SSE4-NEXT: paddq %xmm2, %xmm0
@@ -1743,7 +1743,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind {
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-XOP-NEXT: vpsrlq $32, %xmm0, %xmm3
; X64-XOP-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
-; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-XOP-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0]
; X64-XOP-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; X64-XOP-NEXT: vpsllq $32, %xmm0, %xmm0
; X64-XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0
@@ -1755,7 +1755,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind {
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
; X64-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm3
; X64-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
-; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,0,0,0,0,0,0,255,255,255,255,0,0,0,0]
; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
; X64-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
@@ -1763,7 +1763,7 @@ define <2 x i64> @mul_v2i64_15_neg_63(<2 x i64> %a0) nounwind {
;
; X64-AVX512DQ-LABEL: mul_v2i64_15_neg_63:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [15,18446744073709551553]
; X64-AVX512DQ-NEXT: retq
%1 = mul <2 x i64> %a0, <i64 15, i64 -63>
ret <2 x i64> %1
@@ -1773,36 +1773,36 @@ define <4 x i32> @mul_v4i32_0_15_31_7(<4 x i32> %a0) nounwind {
; X86-SSE2-LABEL: mul_v4i32_0_15_31_7:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,15,31,7]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [15,u,7,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE2-NEXT: retl
;
; X86-SSE4-LABEL: mul_v4i32_0_15_31_7:
; X86-SSE4: # %bb.0:
-; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,15,31,7]
; X86-SSE4-NEXT: retl
;
; X64-SSE2-LABEL: mul_v4i32_0_15_31_7:
; X64-SSE2: # %bb.0:
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,15,31,7]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X64-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [15,u,7,u]
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-SSE2-NEXT: retq
;
; X64-SSE4-LABEL: mul_v4i32_0_15_31_7:
; X64-SSE4: # %bb.0:
-; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,15,31,7]
; X64-SSE4-NEXT: retq
;
; X64-AVX-LABEL: mul_v4i32_0_15_31_7:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,15,31,7]
; X64-AVX-NEXT: retq
%1 = mul <4 x i32> %a0, <i32 0, i32 15, i32 31, i32 7>
ret <4 x i32> %1
@@ -1947,7 +1947,7 @@ define <2 x i64> @mul_v2i64_68_132(<2 x i64> %x) nounwind {
;
; X64-AVX512DQ-LABEL: mul_v2i64_68_132:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [68,132]
; X64-AVX512DQ-NEXT: retq
%mul = mul <2 x i64> %x, <i64 68, i64 132>
ret <2 x i64> %mul
@@ -2009,7 +2009,7 @@ define <2 x i64> @mul_v2i64_60_120(<2 x i64> %x) nounwind {
;
; X64-AVX512DQ-LABEL: mul_v2i64_60_120:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [60,124]
; X64-AVX512DQ-NEXT: retq
%mul = mul <2 x i64> %x, <i64 60, i64 124>
ret <2 x i64> %mul
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index 93f4ce7..0bf5a8d 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -1092,9 +1092,9 @@ define <4 x i32> @constant_rotate_v4i32(<4 x i32> %a) nounwind {
; SSE2-LABEL: constant_rotate_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,128,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1106,8 +1106,8 @@ define <4 x i32> @constant_rotate_v4i32(<4 x i32> %a) nounwind {
; SSE41-LABEL: constant_rotate_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,128,u]
+; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1118,8 +1118,8 @@ define <4 x i32> @constant_rotate_v4i32(<4 x i32> %a) nounwind {
; AVX1-LABEL: constant_rotate_v4i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,u,128,u]
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2]
@@ -1156,9 +1156,9 @@ define <4 x i32> @constant_rotate_v4i32(<4 x i32> %a) nounwind {
; X86-SSE2-LABEL: constant_rotate_v4i32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [16,32,64,128]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
-; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [32,u,128,u]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index 64c3118..5ae3e2f 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -895,13 +895,13 @@ define <8 x i32> @constant_rotate_v8i32(<8 x i32> %a) nounwind {
; AVX1-LABEL: constant_rotate_v8i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,u,128,u]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [512,u,2048,u]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128]
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [256,512,1024,2048]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vmovshdup {{.*#+}} ymm2 = ymm0[1,1,3,3,5,5,7,7]
; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7]
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index 99dac74..3085c32 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -987,21 +987,21 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
; SSE2-LABEL: constant_shift_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,u,128,u]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: constant_shift_v4i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16,32,64,128]
; SSE41-NEXT: retq
;
; AVX1-LABEL: constant_shift_v4i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16,32,64,128]
; AVX1-NEXT: retq
;
; AVX2-LABEL: constant_shift_v4i32:
@@ -1032,9 +1032,9 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
; X86-SSE-LABEL: constant_shift_v4i32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [16,32,64,128]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [32,u,128,u]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index b56a8b5..f9ccd1e 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -1117,9 +1117,9 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
; AVX1-LABEL: constant_shift_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [16,32,64,128]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,512,256,128]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -1153,9 +1153,9 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
;
; X86-AVX1-LABEL: constant_shift_v8i32:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
+; X86-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 # [16,32,64,128]
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; X86-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [256,512,256,128]
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X86-AVX1-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
index 0e20b18..18d79b6 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
@@ -778,9 +778,9 @@ define <16 x i8> @combine_shl_pshufb(<4 x i32> %a0) {
; SSSE3-LABEL: combine_shl_pshufb:
; SSSE3: # %bb.0:
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,256,65536,65536]
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [256,u,65536,u]
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
@@ -788,13 +788,13 @@ define <16 x i8> @combine_shl_pshufb(<4 x i32> %a0) {
;
; SSE41-LABEL: combine_shl_pshufb:
; SSE41: # %bb.0:
-; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,256,65536,65536]
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
; SSE41-NEXT: retq
;
; AVX1-LABEL: combine_shl_pshufb:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,256,65536,65536]
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
; AVX1-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll
index 1af7542..4235377 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-math.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll
@@ -2110,7 +2110,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
; SSE: # %bb.0:
; SSE-NEXT: xorps %xmm2, %xmm2
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,3]
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
@@ -2119,7 +2119,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -2127,7 +2127,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; AVX2-SLOW-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3]
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
@@ -2135,7 +2135,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,2,4,6,0,0,0,0]
; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
-; AVX2-FAST-ALL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-FAST-ALL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3]
; AVX2-FAST-ALL-NEXT: vzeroupper
; AVX2-FAST-ALL-NEXT: retq
;
@@ -2143,7 +2143,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
; AVX2-FAST-PERLANE: # %bb.0:
; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; AVX2-FAST-PERLANE-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-FAST-PERLANE-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3]
; AVX2-FAST-PERLANE-NEXT: vzeroupper
; AVX2-FAST-PERLANE-NEXT: retq
;
@@ -2151,7 +2151,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3]
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = mul <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
@@ -2253,13 +2253,13 @@ define <8 x i16> @trunc_mul_const_v8i32_v8i16(<8 x i32> %a0) nounwind {
define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
; SSE-LABEL: trunc_mul_const_v16i64_v16i8:
; SSE: # %bb.0:
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,3]
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [4,5]
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [6,7]
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [8,9]
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 # [10,11]
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [12,13]
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7 # [14,15]
; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; SSE-NEXT: pand %xmm8, %xmm7
; SSE-NEXT: pand %xmm8, %xmm6
@@ -2280,18 +2280,18 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX1-LABEL: trunc_mul_const_v16i64_v16i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 # [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm5
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2,3]
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm5 # [4,5]
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm6
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [6,7]
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm6 # [8,9]
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm7
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [10,11]
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm7 # [12,13]
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
-; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [14,15]
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm8 = [255,255]
; AVX1-NEXT: vpand %xmm3, %xmm8, %xmm3
; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm7
@@ -2313,10 +2313,10 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX2-LABEL: trunc_mul_const_v16i64_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,1,2,3]
+; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [4,5,6,7]
+; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [8,9,10,11]
+; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [12,13,14,15]
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
@@ -2335,8 +2335,8 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX512F-LABEL: trunc_mul_const_v16i64_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
-; AVX512F-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512F-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [0,1,2,3,4,5,6,7]
+; AVX512F-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [8,9,10,11,12,13,14,15]
; AVX512F-NEXT: vpmovqb %zmm1, %xmm1
; AVX512F-NEXT: vpmovqb %zmm0, %xmm0
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -2345,8 +2345,8 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX512BW-LABEL: trunc_mul_const_v16i64_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [0,1,2,3,4,5,6,7]
+; AVX512BW-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [8,9,10,11,12,13,14,15]
; AVX512BW-NEXT: vpmovqb %zmm1, %xmm1
; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0
; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -2355,8 +2355,8 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX512DQ-LABEL: trunc_mul_const_v16i64_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [0,1,2,3,4,5,6,7]
+; AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [8,9,10,11,12,13,14,15]
; AVX512DQ-NEXT: vpmovqb %zmm1, %xmm1
; AVX512DQ-NEXT: vpmovqb %zmm0, %xmm0
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -2371,27 +2371,27 @@ define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
; SSE-LABEL: trunc_mul_const_v16i32_v16i8:
; SSE: # %bb.0:
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,1,2,3]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [1,u,3,u]
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [4,5,6,7]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [5,u,7,u]
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [8,9,10,11]
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [9,u,11,u]
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [12,13,14,15]
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
-; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4
+; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [13,u,15,u]
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
@@ -2406,12 +2406,12 @@ define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX1-LABEL: trunc_mul_const_v16i32_v16i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 # [0,1,2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4,5,6,7]
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3 # [8,9,10,11]
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [12,13,14,15]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [255,255,255,255]
; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
@@ -2425,8 +2425,8 @@ define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX2-LABEL: trunc_mul_const_v16i32_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,1,2,3,4,5,6,7]
+; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [8,9,10,11,12,13,14,15]
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -2439,7 +2439,7 @@ define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX512-LABEL: trunc_mul_const_v16i32_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll
index 17315c4..ac330a7 100644
--- a/llvm/test/CodeGen/X86/vselect-avx.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx.ll
@@ -95,7 +95,7 @@ bb:
define void @test3(<4 x i32> %induction30, ptr %tmp16, ptr %tmp17, <4 x i16> %tmp3, <4 x i16> %tmp12) {
; AVX1-LABEL: test3:
; AVX1: ## %bb.0:
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## [2863311531,2863311531,2863311531,2863311531]
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
@@ -151,23 +151,19 @@ define <32 x i8> @PR22706(<32 x i1> %x) {
; AVX1: ## %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpsllw $7, %xmm1, %xmm1
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
-; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: PR22706:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vselect-pcmp.ll b/llvm/test/CodeGen/X86/vselect-pcmp.ll
index 8543e9f..16700d4 100644
--- a/llvm/test/CodeGen/X86/vselect-pcmp.ll
+++ b/llvm/test/CodeGen/X86/vselect-pcmp.ll
@@ -1046,7 +1046,7 @@ define <2 x i64> @blend_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z
define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; AVX1-LABEL: blend_mask_cond_v4i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32768,4194304,1073741824,2147483648]
; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX1-NEXT: retq
;
@@ -1211,9 +1211,9 @@ define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z
define <8 x i32> @blend_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; AVX1-LABEL: blend_mask_cond_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 # [2147483648,1073741824,268435456,536870912]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [268435456,2097152,1073741824,524288]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
; AVX1-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; AVX1-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs-simd.ll b/llvm/test/CodeGen/X86/zero-call-used-regs-simd.ll
new file mode 100644
index 0000000..d9253e0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/zero-call-used-regs-simd.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -verify-machineinstrs | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 -verify-machineinstrs | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512VL
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl,+avx512bw -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512BW
+
+define void @zero_xmm(<4 x i32> %arg) #0 {
+; SSE-LABEL: zero_xmm:
+; SSE: # %bb.0:
+; SSE-NEXT: movaps %xmm0, 0
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: zero_xmm:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovaps %xmm0, 0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: zero_xmm:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovaps %xmm0, 0
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: retq
+ store <4 x i32> %arg, ptr null, align 32
+ ret void
+}
+
+define void @zero_ymm(<8 x i32> %arg) #0 {
+; SSE-LABEL: zero_ymm:
+; SSE: # %bb.0:
+; SSE-NEXT: movaps %xmm1, 16
+; SSE-NEXT: movaps %xmm0, 0
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: zero_ymm:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovaps %ymm0, 0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: zero_ymm:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovaps %ymm0, 0
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ store <8 x i32> %arg, ptr null, align 32
+ ret void
+}
+
+define void @zero_zmm(<16 x i32> %arg) #0 {
+; SSE-LABEL: zero_zmm:
+; SSE: # %bb.0:
+; SSE-NEXT: movaps %xmm3, 48
+; SSE-NEXT: movaps %xmm2, 32
+; SSE-NEXT: movaps %xmm1, 16
+; SSE-NEXT: movaps %xmm0, 0
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: xorps %xmm2, %xmm2
+; SSE-NEXT: xorps %xmm3, %xmm3
+; SSE-NEXT: retq
+;
+; AVX-LABEL: zero_zmm:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovaps %ymm1, 32
+; AVX-NEXT: vmovaps %ymm0, 0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: zero_zmm:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovups %zmm0, 0
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ store <16 x i32> %arg, ptr null, align 32
+ ret void
+}
+
+define void @zero_k(<8 x i32> %arg, <8 x i1> %mask) #0 {
+; SSE-LABEL: zero_k:
+; SSE: # %bb.0:
+; SSE-NEXT: psllw $15, %xmm2
+; SSE-NEXT: packsswb %xmm2, %xmm2
+; SSE-NEXT: pmovmskb %xmm2, %eax
+; SSE-NEXT: testb $1, %al
+; SSE-NEXT: jne .LBB3_1
+; SSE-NEXT: # %bb.2: # %else
+; SSE-NEXT: testb $2, %al
+; SSE-NEXT: jne .LBB3_3
+; SSE-NEXT: .LBB3_4: # %else2
+; SSE-NEXT: testb $4, %al
+; SSE-NEXT: jne .LBB3_5
+; SSE-NEXT: .LBB3_6: # %else4
+; SSE-NEXT: testb $8, %al
+; SSE-NEXT: jne .LBB3_7
+; SSE-NEXT: .LBB3_8: # %else6
+; SSE-NEXT: testb $16, %al
+; SSE-NEXT: jne .LBB3_9
+; SSE-NEXT: .LBB3_10: # %else8
+; SSE-NEXT: testb $32, %al
+; SSE-NEXT: jne .LBB3_11
+; SSE-NEXT: .LBB3_12: # %else10
+; SSE-NEXT: testb $64, %al
+; SSE-NEXT: jne .LBB3_13
+; SSE-NEXT: .LBB3_14: # %else12
+; SSE-NEXT: testb $-128, %al
+; SSE-NEXT: je .LBB3_16
+; SSE-NEXT: .LBB3_15: # %cond.store13
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3]
+; SSE-NEXT: movd %xmm0, 28
+; SSE-NEXT: .LBB3_16: # %else14
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm2
+; SSE-NEXT: retq
+; SSE-NEXT: .LBB3_1: # %cond.store
+; SSE-NEXT: movd %xmm0, 0
+; SSE-NEXT: testb $2, %al
+; SSE-NEXT: je .LBB3_4
+; SSE-NEXT: .LBB3_3: # %cond.store1
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
+; SSE-NEXT: movd %xmm2, 4
+; SSE-NEXT: testb $4, %al
+; SSE-NEXT: je .LBB3_6
+; SSE-NEXT: .LBB3_5: # %cond.store3
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
+; SSE-NEXT: movd %xmm2, 8
+; SSE-NEXT: testb $8, %al
+; SSE-NEXT: je .LBB3_8
+; SSE-NEXT: .LBB3_7: # %cond.store5
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; SSE-NEXT: movd %xmm0, 12
+; SSE-NEXT: testb $16, %al
+; SSE-NEXT: je .LBB3_10
+; SSE-NEXT: .LBB3_9: # %cond.store7
+; SSE-NEXT: movd %xmm1, 16
+; SSE-NEXT: testb $32, %al
+; SSE-NEXT: je .LBB3_12
+; SSE-NEXT: .LBB3_11: # %cond.store9
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
+; SSE-NEXT: movd %xmm0, 20
+; SSE-NEXT: testb $64, %al
+; SSE-NEXT: je .LBB3_14
+; SSE-NEXT: .LBB3_13: # %cond.store11
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; SSE-NEXT: movd %xmm0, 24
+; SSE-NEXT: testb $-128, %al
+; SSE-NEXT: jne .LBB3_15
+; SSE-NEXT: jmp .LBB3_16
+;
+; AVX1-LABEL: zero_k:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vmaskmovps %ymm0, %ymm1, 0
+; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: zero_k:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
+; AVX2-NEXT: vpmaskmovd %ymm0, %ymm1, 0
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: zero_k:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX512VL-NEXT: vpslld $31, %ymm1, %ymm1
+; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
+; AVX512VL-NEXT: vmovdqa32 %ymm0, 0 {%k1}
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: kxorw %k0, %k0, %k1
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: zero_k:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsllw $15, %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovw2m %xmm1, %k1
+; AVX512BW-NEXT: vmovdqa32 %ymm0, 0 {%k1}
+; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX512BW-NEXT: kxorq %k0, %k0, %k1
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+ tail call void @llvm.masked.store.v8i32.p0(<8 x i32> %arg, ptr null, i32 32, <8 x i1> %mask)
+ ret void
+}
+
+attributes #0 = { "zero-call-used-regs"="used" }