diff options
Diffstat (limited to 'llvm/test')
124 files changed, 9667 insertions, 3120 deletions
diff --git a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll index 1f5844b..c7635e8 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll @@ -5,105 +5,105 @@ define void @casts() { ; CHECK-LABEL: 'casts' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef) @@ -275,51 +275,51 @@ define void @fp16() { ; ; CHECK-FP16-LABEL: 'fp16' ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll index ca4f51f..10973bb 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll @@ -5,57 +5,63 @@ ; Verify the cost model for reverse shuffles. ; -define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128) { -; CHECK-LABEL: 'test_vXi32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +;; Reverse shuffles should be lowered to vrev and possibly a vext (for quadwords, on neon) +define void @reverse() { +; CHECK-LABEL: 'reverse' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0> - %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> - ret void -} + %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0> + %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -define void @test_vXi64(<2 x i64> %src128) { -; CHECK-LABEL: 'test_vXi64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; - %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0> - ret void -} + %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0> + %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -define void @test_vXf32(<2 x float> %src64, <4 x float> %src128) { -; CHECK-LABEL: 'test_vXf32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; - %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0> - %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> - ret void -} + %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0> + %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -define void @test_vXf64(<2 x double> %src128) { -; CHECK-LABEL: 'test_vXf64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; - %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0> - ret void -} + %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0> + %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; -; Tests the cost model for reverse shuffles of second operand. -; + %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0> + %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + + %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0> + %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + + %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0> + %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128) { -; CHECK-LABEL: 'test_upper_vXf32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 2> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 7, i32 6, i32 5, i32 4> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; - %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 2> - %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 7, i32 6, i32 5, i32 4> ret void } + + diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index 1859309..d0c987f 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -281,7 +281,7 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) { ; THRU-LABEL: 'maskedgather' -; THRU-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; THRU-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedgather' @@ -302,7 +302,7 @@ define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) { define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) { ; THRU-LABEL: 'maskedscatter' -; THRU-NEXT: Cost Model: Found an estimated cost of 92 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; THRU-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedscatter' diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll index 36d027b..0dc3f98 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -882,85 +882,139 @@ define i32 @masked_expandload() { define i32 @masked_compressstore() { ; SSE2-LABEL: 'masked_compressstore' -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_compressstore' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; -; AVX-LABEL: 'masked_compressstore' -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; AVX1-LABEL: 'masked_compressstore' +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_compressstore' +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_compressstore' +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_compressstore' ; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) @@ -1242,19 +1296,19 @@ define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) { ; SSE2-LABEL: 'test_gather_2f64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; SSE42-LABEL: 'test_gather_2f64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; AVX1-LABEL: 'test_gather_2f64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; AVX2-LABEL: 'test_gather_2f64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; SKL-LABEL: 'test_gather_2f64' @@ -1271,19 +1325,19 @@ define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32' @@ -1383,25 +1437,25 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, < ; SSE2-LABEL: 'test_gather_16f32_var_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_var_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_var_mask' @@ -1427,25 +1481,25 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3 ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' @@ -1532,7 +1586,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_scatter_16i32' @@ -1540,7 +1594,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_scatter_16i32' @@ -1548,7 +1602,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_scatter_16i32' @@ -1556,7 +1610,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKL-LABEL: 'test_scatter_16i32' @@ -1564,7 +1618,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SKL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_16i32' @@ -1586,15 +1640,15 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { ; SSE2-LABEL: 'test_scatter_8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_scatter_8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_8i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_8i32' @@ -1607,15 +1661,15 @@ define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; SSE2-LABEL: 'test_scatter_4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_scatter_4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; KNL-LABEL: 'test_scatter_4i32' @@ -1634,25 +1688,25 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) ; SSE2-LABEL: 'test_gather_4f32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32' diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index a6fb78f..02a279e 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -1266,85 +1266,139 @@ define i32 @masked_expandload() { define i32 @masked_compressstore() { ; SSE2-LABEL: 'masked_compressstore' -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_compressstore' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; -; AVX-LABEL: 'masked_compressstore' -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; AVX1-LABEL: 'masked_compressstore' +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_compressstore' +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_compressstore' +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_compressstore' ; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) @@ -1626,19 +1680,19 @@ define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) { ; SSE2-LABEL: 'test_gather_2f64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; SSE42-LABEL: 'test_gather_2f64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; AVX1-LABEL: 'test_gather_2f64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; AVX2-LABEL: 'test_gather_2f64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res ; ; SKL-LABEL: 'test_gather_2f64' @@ -1655,19 +1709,19 @@ define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32' @@ -1767,25 +1821,25 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, < ; SSE2-LABEL: 'test_gather_16f32_var_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_var_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_var_mask' @@ -1811,25 +1865,25 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3 ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' @@ -1916,7 +1970,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_scatter_16i32' @@ -1924,7 +1978,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_scatter_16i32' @@ -1932,7 +1986,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_scatter_16i32' @@ -1940,7 +1994,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKL-LABEL: 'test_scatter_16i32' @@ -1948,7 +2002,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SKL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_16i32' @@ -1970,15 +2024,15 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { ; SSE2-LABEL: 'test_scatter_8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_scatter_8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_8i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_8i32' @@ -1991,15 +2045,15 @@ define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; SSE2-LABEL: 'test_scatter_4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_scatter_4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; KNL-LABEL: 'test_scatter_4i32' @@ -2018,25 +2072,25 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) ; SSE2-LABEL: 'test_gather_4f32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll index afb73f4..da38a10 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll @@ -4,8 +4,8 @@ ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 -; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 ; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQVEC512 @@ -17,70 +17,81 @@ define void @replication_i1_stride2() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride2' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> -; SSE2-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> +; SSE2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride2' -; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> -; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> -; SSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> -; SSE3-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> -; SSE3-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> -; SSE3-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> -; SSE3-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> +; SSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> +; SSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> +; SSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride2' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride2' -; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> -; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> -; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> -; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> -; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> +; SSE41-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> +; SSE41-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> +; SSE41-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride2' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> -; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> +; SSE42-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride2' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> -; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> -; AVX-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> -; AVX-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> -; AVX-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> +; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> +; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> +; AVX1-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> +; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63> +; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7, i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15, i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23, i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31, i32 32, i32 32, i32 33, i32 33, i32 34, i32 34, i32 35, i32 35, i32 36, i32 36, i32 37, i32 37, i32 38, i32 38, i32 39, i32 39, i32 40, i32 40, i32 41, i32 41, i32 42, i32 42, i32 43, i32 43, i32 44, i32 44, i32 45, i32 45, i32 46, i32 46, i32 47, i32 47, i32 48, i32 48, i32 49, i32 49, i32 50, i32 50, i32 51, i32 51, i32 52, i32 52, i32 53, i32 53, i32 54, i32 54, i32 55, i32 55, i32 56, i32 56, i32 57, i32 57, i32 58, i32 58, i32 59, i32 59, i32 60, i32 60, i32 61, i32 61, i32 62, i32 62, i32 63, i32 63, i32 64, i32 64, i32 65, i32 65, i32 66, i32 66, i32 67, i32 67, i32 68, i32 68, i32 69, i32 69, i32 70, i32 70, i32 71, i32 71, i32 72, i32 72, i32 73, i32 73, i32 74, i32 74, i32 75, i32 75, i32 76, i32 76, i32 77, i32 77, i32 78, i32 78, i32 79, i32 79, i32 80, i32 80, i32 81, i32 81, i32 82, i32 82, i32 83, i32 83, i32 84, i32 84, i32 85, i32 85, i32 86, i32 86, i32 87, i32 87, i32 88, i32 88, i32 89, i32 89, i32 90, i32 90, i32 91, i32 91, i32 92, i32 92, i32 93, i32 93, i32 94, i32 94, i32 95, i32 95, i32 96, i32 96, i32 97, i32 97, i32 98, i32 98, i32 99, i32 99, i32 100, i32 100, i32 101, i32 101, i32 102, i32 102, i32 103, i32 103, i32 104, i32 104, i32 105, i32 105, i32 106, i32 106, i32 107, i32 107, i32 108, i32 108, i32 109, i32 109, i32 110, i32 110, i32 111, i32 111, i32 112, i32 112, i32 113, i32 113, i32 114, i32 114, i32 115, i32 115, i32 116, i32 116, i32 117, i32 117, i32 118, i32 118, i32 119, i32 119, i32 120, i32 120, i32 121, i32 121, i32 122, i32 122, i32 123, i32 123, i32 124, i32 124, i32 125, i32 125, i32 126, i32 126, i32 127, i32 127> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride2' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer @@ -183,70 +194,81 @@ define void @replication_i1_stride2() nounwind "min-legal-vector-width"="256" { define void @replication_i1_stride3() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride3' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> -; SSE2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> -; SSE2-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> -; SSE2-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> -; SSE2-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> +; SSE2-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> +; SSE2-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> +; SSE2-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride3' -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> -; SSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> -; SSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> -; SSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> -; SSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> -; SSE3-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> -; SSE3-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> +; SSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> +; SSE3-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> +; SSE3-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> +; SSE3-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride3' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride3' -; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> -; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> -; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> -; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> -; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> -; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> -; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> +; SSE41-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> +; SSE41-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> +; SSE41-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> +; SSE41-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> +; SSE41-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> +; SSE41-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride3' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> -; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> -; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> +; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> +; SSE42-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> +; SSE42-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> +; SSE42-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride3' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> -; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> -; AVX-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> -; AVX-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> -; AVX-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> -; AVX-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride3' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> +; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> +; AVX1-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> +; AVX1-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride3' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15> +; AVX2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31> +; AVX2-NEXT: Cost Model: Found an estimated cost of 206 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63> +; AVX2-NEXT: Cost Model: Found an estimated cost of 412 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride3' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer @@ -349,70 +371,81 @@ define void @replication_i1_stride3() nounwind "min-legal-vector-width"="256" { define void @replication_i1_stride4() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride4' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> -; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> -; SSE2-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> -; SSE2-NEXT: Cost Model: Found an estimated cost of 560 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1120 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> +; SSE2-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> +; SSE2-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride4' -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> -; SSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> -; SSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> -; SSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> -; SSE3-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> -; SSE3-NEXT: Cost Model: Found an estimated cost of 560 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> -; SSE3-NEXT: Cost Model: Found an estimated cost of 1120 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> +; SSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> +; SSE3-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> +; SSE3-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride4' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 560 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1120 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride4' -; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> -; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> -; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> -; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> -; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> -; SSE41-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> -; SSE41-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> +; SSE41-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> +; SSE41-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> +; SSE41-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> +; SSE41-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride4' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> -; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> -; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> -; SSE42-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> +; SSE42-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride4' -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> -; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> -; AVX-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> -; AVX-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> -; AVX-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> +; AVX1-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> +; AVX1-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> +; AVX1-NEXT: Cost Model: Found an estimated cost of 552 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15> +; AVX2-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31> +; AVX2-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63> +; AVX2-NEXT: Cost Model: Found an estimated cost of 548 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride4' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer @@ -515,70 +548,81 @@ define void @replication_i1_stride4() nounwind "min-legal-vector-width"="256" { define void @replication_i1_stride5() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride5' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE2-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE2-NEXT: Cost Model: Found an estimated cost of 432 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE2-NEXT: Cost Model: Found an estimated cost of 864 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1728 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride5' -; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE3-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE3-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE3-NEXT: Cost Model: Found an estimated cost of 432 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE3-NEXT: Cost Model: Found an estimated cost of 864 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE3-NEXT: Cost Model: Found an estimated cost of 1728 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE3-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE3-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE3-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE3-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride5' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 432 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 864 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1728 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride5' -; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE41-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE41-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE41-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE41-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE41-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE41-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE41-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride5' -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE42-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE42-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE42-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE42-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE42-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride5' -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> -; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> -; AVX-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> -; AVX-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> -; AVX-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride5' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> +; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> +; AVX1-NEXT: Cost Model: Found an estimated cost of 344 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> +; AVX1-NEXT: Cost Model: Found an estimated cost of 688 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride5' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7> +; AVX2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15> +; AVX2-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31> +; AVX2-NEXT: Cost Model: Found an estimated cost of 342 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63> +; AVX2-NEXT: Cost Model: Found an estimated cost of 684 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride5' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer @@ -681,70 +725,81 @@ define void @replication_i1_stride5() nounwind "min-legal-vector-width"="256" { define void @replication_i1_stride6() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride6' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE2-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE2-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE2-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1856 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE2-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE2-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE2-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride6' -; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE3-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE3-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE3-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE3-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE3-NEXT: Cost Model: Found an estimated cost of 1856 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE3-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE3-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE3-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride6' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1856 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride6' -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE41-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE41-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE41-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE41-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE41-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE41-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE41-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE41-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride6' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE42-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE42-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE42-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE42-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE42-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE42-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride6' -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; AVX-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; AVX-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; AVX-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; AVX-NEXT: Cost Model: Found an estimated cost of 944 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride6' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; AVX1-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; AVX1-NEXT: Cost Model: Found an estimated cost of 206 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; AVX1-NEXT: Cost Model: Found an estimated cost of 412 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; AVX1-NEXT: Cost Model: Found an estimated cost of 824 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride6' +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; AVX2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; AVX2-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; AVX2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; AVX2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride6' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer @@ -847,70 +902,81 @@ define void @replication_i1_stride6() nounwind "min-legal-vector-width"="256" { define void @replication_i1_stride7() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride7' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE2-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE2-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE2-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1984 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE2-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE2-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE2-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride7' -; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE3-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE3-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE3-NEXT: Cost Model: Found an estimated cost of 1984 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE3-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE3-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE3-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE3-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride7' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1984 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride7' -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE41-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE41-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE41-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE41-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE41-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE41-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE41-NEXT: Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride7' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE42-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE42-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE42-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE42-NEXT: Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride7' -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; AVX-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; AVX-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; AVX-NEXT: Cost Model: Found an estimated cost of 540 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; AVX-NEXT: Cost Model: Found an estimated cost of 1080 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride7' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; AVX1-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; AVX1-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; AVX1-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride7' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; AVX2-NEXT: Cost Model: Found an estimated cost of 239 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; AVX2-NEXT: Cost Model: Found an estimated cost of 478 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; AVX2-NEXT: Cost Model: Found an estimated cost of 956 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride7' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer @@ -1013,70 +1079,81 @@ define void @replication_i1_stride7() nounwind "min-legal-vector-width"="256" { define void @replication_i1_stride8() nounwind "min-legal-vector-width"="256" { ; SSE2-LABEL: 'replication_i1_stride8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE2-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE2-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE2-NEXT: Cost Model: Found an estimated cost of 2112 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE2-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE2-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE2-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'replication_i1_stride8' -; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer -; SSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE3-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE3-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE3-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE3-NEXT: Cost Model: Found an estimated cost of 2112 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE3-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE3-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE3-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'replication_i1_stride8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2112 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'replication_i1_stride8' -; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer -; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE41-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE41-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE41-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE41-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE41-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE41-NEXT: Cost Model: Found an estimated cost of 516 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'replication_i1_stride8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; SSE42-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; SSE42-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; SSE42-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; SSE42-NEXT: Cost Model: Found an estimated cost of 516 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'replication_i1_stride8' -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -; AVX-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; AVX-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> -; AVX-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> -; AVX-NEXT: Cost Model: Found an estimated cost of 1216 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'replication_i1_stride8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; AVX1-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; AVX1-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; AVX1-NEXT: Cost Model: Found an estimated cost of 548 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1096 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +; AVX2-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; AVX2-NEXT: Cost Model: Found an estimated cost of 273 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> +; AVX2-NEXT: Cost Model: Found an estimated cost of 546 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1092 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 21, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 26, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 30, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 34, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 35, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 38, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 39, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 41, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 43, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 44, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 45, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 46, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 47, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 49, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 50, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 51, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 52, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 53, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 57, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 58, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 59, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 60, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 61, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 62, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 63, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 65, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 66, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 67, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 68, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 69, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 70, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 71, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 72, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 73, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 74, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 75, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 76, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 77, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 78, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 79, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 81, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 82, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 83, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 84, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 85, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 86, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 87, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 88, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 89, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 90, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 91, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 92, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 93, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 94, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 97, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 98, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 99, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 101, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 102, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 103, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 104, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 105, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 106, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 107, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 108, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 109, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 110, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 111, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 113, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 114, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 115, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 116, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 117, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 118, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 119, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 120, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 121, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 122, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 123, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 124, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 125, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 126, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride8' ; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll index f947106..bb868b5 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/LoopnestFixedSize.ll @@ -6,8 +6,8 @@ target triple = "powerpc64le-unknown-linux-gnu" ; Check delinearization in loop cache analysis can handle fixed-size arrays. ; The IR is copied from llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll -; CHECK-DAG: Loop 'for.body' has cost = 4186116 -; CHECK-DAG: Loop 'for.body4' has cost = 128898 +; CHECK: Loop 'for.body' has cost = 4186116 +; CHECK: Loop 'for.body4' has cost = 128898 ;; #define N 1024 ;; #define M 2048 @@ -48,8 +48,8 @@ for.end13: ; preds = %for.inc11 } -; CHECK-DAG: Loop 'for.body' has cost = 4186116 -; CHECK-DAG: Loop 'for.body4' has cost = 128898 +; CHECK: Loop 'for.body' has cost = 4186116 +; CHECK: Loop 'for.body4' has cost = 128898 define void @t2([2048 x i32]* %a) { entry: @@ -83,14 +83,13 @@ for.end13: ; preds = %for.inc11 declare [2048 x i32]* @func_with_returned_arg([2048 x i32]* returned %arg) -; CHECK-DAG: Loop 'for.body' has cost = 4472886244958208 -; CHECK-DAG: Loop 'for.body4' has cost = 4472886244958208 -; CHECK-DAG: Loop 'for.body8' has cost = 4472886244958208 -; CHECK-DAG: Loop 'for.body12' has cost = 4472886244958208 -; CHECK-DAG: Loop 'for.body16' has cost = 137728168833024 +; CHECK: Loop 'for.body' has cost = 2112128815104000000 +; CHECK: Loop 'for.body4' has cost = 16762927104000000 +; CHECK: Loop 'for.body8' has cost = 130960368000000 +; CHECK: Loop 'for.body12' has cost = 1047682944000 +; CHECK: Loop 'for.body16' has cost = 32260032000 - -;; #define N 1024 +;; #define N 128 ;; #define M 2048 ;; void t3(int a[][N][N][N][M]) { ;; for (int i1 = 0; i1 < N-1; ++i1) @@ -101,7 +100,7 @@ declare [2048 x i32]* @func_with_returned_arg([2048 x i32]* returned %arg) ;; a[i1][i2][i3][i4][i5] = a[i1+1][i2-2][i3][i4-3][i5+2]; ;; } -define void @t3([1024 x [1024 x [1024 x [2048 x i32]]]]* %a) { +define void @t3([128 x [128 x [128 x [2048 x i32]]]]* %a) { entry: br label %for.body @@ -127,9 +126,9 @@ for.body16: ; preds = %for.body12, %for.bo %1 = add nsw i64 %indvars.iv14, -2 %2 = add nsw i64 %indvars.iv7, -3 %3 = add nuw nsw i64 %indvars.iv, 2 - %arrayidx26 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3 + %arrayidx26 = getelementptr inbounds [128 x [128 x [128 x [2048 x i32]]]], [128 x [128 x [128 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3 %4 = load i32, i32* %arrayidx26, align 4 - %arrayidx36 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv + %arrayidx36 = getelementptr inbounds [128 x [128 x [128 x [2048 x i32]]]], [128 x [128 x [128 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv store i32 %4, i32* %arrayidx36, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp ne i64 %indvars.iv.next, 2046 @@ -137,22 +136,22 @@ for.body16: ; preds = %for.body12, %for.bo for.inc37: ; preds = %for.body16 %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1 - %exitcond10 = icmp ne i64 %indvars.iv.next8, 1024 + %exitcond10 = icmp ne i64 %indvars.iv.next8, 128 br i1 %exitcond10, label %for.body12, label %for.inc40 for.inc40: ; preds = %for.inc37 %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1 - %exitcond13 = icmp ne i64 %indvars.iv.next12, 1024 + %exitcond13 = icmp ne i64 %indvars.iv.next12, 128 br i1 %exitcond13, label %for.body8, label %for.inc43 for.inc43: ; preds = %for.inc40 %indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1 - %exitcond17 = icmp ne i64 %indvars.iv.next15, 1024 + %exitcond17 = icmp ne i64 %indvars.iv.next15, 128 br i1 %exitcond17, label %for.body4, label %for.inc46 for.inc46: ; preds = %for.inc43 %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1 - %exitcond21 = icmp ne i64 %indvars.iv.next19, 1023 + %exitcond21 = icmp ne i64 %indvars.iv.next19, 127 br i1 %exitcond21, label %for.body, label %for.end48 for.end48: ; preds = %for.inc46 diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll index b73abca..c5c9e76 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/loads-store.ll @@ -10,9 +10,9 @@ target triple = "powerpc64le-unknown-linux-gnu" ; A[i][k][j] += B[i][k][j] + C[i][j][k]; ; } -; CHECK-DAG: Loop 'for.i' has cost = 3000000 -; CHECK-DAG: Loop 'for.k' has cost = 2030000 -; CHECK-DAG: Loop 'for.j' has cost = 1060000 +; CHECK: Loop 'for.i' has cost = 3000000 +; CHECK: Loop 'for.k' has cost = 2030000 +; CHECK: Loop 'for.j' has cost = 1060000 define void @foo(i64 %n, i64 %m, i64 %o, i32* %A, i32* %B, i32* %C) { entry: diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll index c7342fe..6679b19 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matmul.ll @@ -10,9 +10,9 @@ target triple = "powerpc64le-unknown-linux-gnu" ; C[i][j] = C[i][j] + A[i][k] * B[k][j]; ; } -; CHECK-DAG:Loop 'for.i' has cost = 2010000 -; CHECK-DAG:Loop 'for.k' has cost = 1040000 -; CHECK-DAG:Loop 'for.j' has cost = 70000 +; CHECK:Loop 'for.i' has cost = 2010000 +; CHECK:Loop 'for.k' has cost = 1040000 +; CHECK:Loop 'for.j' has cost = 70000 define void @matmul(i64 %n, i64 %m, i64 %o, i32* %A, i32* %B, i32* %C) { entry: diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll index 3137952..2b317c0 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll @@ -14,11 +14,11 @@ target triple = "powerpc64le-unknown-linux-gnu" ; y[k+1][j][i][l] = y[k+1][j][i][l] + b[k][j][i][m][l]*x[k][j][i][m] ; } -; CHECK-DAG: Loop 'k_loop' has cost = 30000000000 -; CHECK-DAG: Loop 'j_loop' has cost = 30000000000 -; CHECK-DAG: Loop 'i_loop' has cost = 30000000000 -; CHECK-DAG: Loop 'm_loop' has cost = 10700000000 -; CHECK-DAG: Loop 'l_loop' has cost = 1300000000 +; CHECK: Loop 'k_loop' has cost = 10200000000000000 +; CHECK: Loop 'j_loop' has cost = 102000000000000 +; CHECK: Loop 'i_loop' has cost = 1020000000000 +; CHECK: Loop 'm_loop' has cost = 10700000000 +; CHECK: Loop 'l_loop' has cost = 1300000000 %_elem_type_of_double = type <{ double }> diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/multi-store.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/multi-store.ll new file mode 100644 index 0000000..89d959e --- /dev/null +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/multi-store.ll @@ -0,0 +1,102 @@ +; RUN: opt < %s -opaque-pointers -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" +target triple = "powerpc64le-unknown-linux-gnu" + +; CHECK-DAG: Loop 'for.j' has cost = 201000000 +; CHECK-DAG: Loop 'for.i' has cost = 102000000 +; CHECK-DAG: Loop 'for.k' has cost = 90000 + +;; Test to make sure when we have multiple conflicting access patterns, the +;; chosen loop configuration favours the majority of those accesses. +;; For example this nest should be ordered as j-i-k. +;; for (int i = 0; i < n; i++) +;; for (int j = 0; j < n; j++) +;; for (int k = 0; k < n; k++) { +;; A[i][j][k] = 1; +;; B[j][i][k] = 2; +;; C[j][i][k] = 3; +;; } + +define void @foo(i32 noundef signext %n, ptr noalias noundef %A, ptr noalias noundef %B, ptr noalias noundef %C) { +entry: + %0 = zext i32 %n to i64 + %1 = zext i32 %n to i64 + %2 = zext i32 %n to i64 + %3 = zext i32 %n to i64 + %4 = zext i32 %n to i64 + %5 = zext i32 %n to i64 + %cmp5 = icmp sgt i32 %n, 0 + br i1 %cmp5, label %for.i.preheader, label %for.end30 + +for.i.preheader: ; preds = %entry + %wide.trip.count16 = zext i32 %n to i64 + br label %for.i + +for.i: ; preds = %for.i.preheader, %for.inc28 + %indvars.iv13 = phi i64 [ 0, %for.i.preheader ], [ %indvars.iv.next14, %for.inc28 ] + %cmp23 = icmp sgt i32 %n, 0 + br i1 %cmp23, label %for.j.preheader, label %for.inc28 + +for.j.preheader: ; preds = %for.i + %wide.trip.count11 = zext i32 %n to i64 + br label %for.j + +for.j: ; preds = %for.j.preheader, %for.inc25 + %indvars.iv8 = phi i64 [ 0, %for.j.preheader ], [ %indvars.iv.next9, %for.inc25 ] + %cmp61 = icmp sgt i32 %n, 0 + br i1 %cmp61, label %for.k.preheader, label %for.inc25 + +for.k.preheader: ; preds = %for.j + %wide.trip.count = zext i32 %n to i64 + br label %for.k + +for.k: ; preds = %for.k.preheader, %for.k + %indvars.iv = phi i64 [ 0, %for.k.preheader ], [ %indvars.iv.next, %for.k ] + %6 = mul nuw i64 %0, %1 + %7 = mul nsw i64 %6, %indvars.iv13 + %arrayidx = getelementptr inbounds i32, ptr %A, i64 %7 + %8 = mul nuw nsw i64 %indvars.iv8, %1 + %arrayidx10 = getelementptr inbounds i32, ptr %arrayidx, i64 %8 + %arrayidx12 = getelementptr inbounds i32, ptr %arrayidx10, i64 %indvars.iv + store i32 1, ptr %arrayidx12, align 4 + %9 = mul nuw i64 %2, %3 + %10 = mul nsw i64 %9, %indvars.iv8 + %arrayidx14 = getelementptr inbounds i32, ptr %B, i64 %10 + %11 = mul nuw nsw i64 %indvars.iv13, %3 + %arrayidx16 = getelementptr inbounds i32, ptr %arrayidx14, i64 %11 + %arrayidx18 = getelementptr inbounds i32, ptr %arrayidx16, i64 %indvars.iv + store i32 2, ptr %arrayidx18, align 4 + %12 = mul nuw i64 %4, %5 + %13 = mul nsw i64 %12, %indvars.iv8 + %arrayidx20 = getelementptr inbounds i32, ptr %C, i64 %13 + %14 = mul nuw nsw i64 %indvars.iv13, %5 + %arrayidx22 = getelementptr inbounds i32, ptr %arrayidx20, i64 %14 + %arrayidx24 = getelementptr inbounds i32, ptr %arrayidx22, i64 %indvars.iv + store i32 3, ptr %arrayidx24, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.k, label %for.inc25.loopexit + +for.inc25.loopexit: ; preds = %for.k + br label %for.inc25 + +for.inc25: ; preds = %for.inc25.loopexit, %for.j + %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1 + %exitcond12 = icmp ne i64 %indvars.iv.next9, %wide.trip.count11 + br i1 %exitcond12, label %for.j, label %for.inc28.loopexit + +for.inc28.loopexit: ; preds = %for.inc25 + br label %for.inc28 + +for.inc28: ; preds = %for.inc28.loopexit, %for.i + %indvars.iv.next14 = add nuw nsw i64 %indvars.iv13, 1 + %exitcond17 = icmp ne i64 %indvars.iv.next14, %wide.trip.count16 + br i1 %exitcond17, label %for.i, label %for.end30.loopexit + +for.end30.loopexit: ; preds = %for.inc28 + br label %for.end30 + +for.end30: ; preds = %for.end30.loopexit, %entry + ret void +} diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll index cce6731..8aa247b 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/single-store.ll @@ -10,9 +10,9 @@ target triple = "powerpc64le-unknown-linux-gnu" ; A[2*i+3][3*j-4][2*k+7] = 1; ; } -; CHECK-DAG: Loop 'for.i' has cost = 1000000 -; CHECK-DAG: Loop 'for.j' has cost = 1000000 -; CHECK-DAG: Loop 'for.k' has cost = 60000 +; CHECK: Loop 'for.i' has cost = 100000000 +; CHECK: Loop 'for.j' has cost = 1000000 +; CHECK: Loop 'for.k' has cost = 60000 define void @foo(i64 %n, i64 %m, i64 %o, i32* %A) { entry: diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll index 1f15154..f2549f4 100644 --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/stencil.ll @@ -11,8 +11,8 @@ target triple = "powerpc64le-unknown-linux-gnu" ; } ; } -; CHECK-DAG: Loop 'for.i' has cost = 20600 -; CHECK-DAG: Loop 'for.j' has cost = 800 +; CHECK: Loop 'for.i' has cost = 20600 +; CHECK: Loop 'for.j' has cost = 800 define void @foo(i64 %n, i64 %m, i32* %A, i32* %B, i32* %C) { entry: diff --git a/llvm/test/CodeGen/AArch64/cpus.ll b/llvm/test/CodeGen/AArch64/cpus.ll index 88330b6..9167f12 100644 --- a/llvm/test/CodeGen/AArch64/cpus.ll +++ b/llvm/test/CodeGen/AArch64/cpus.ll @@ -34,6 +34,7 @@ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=tsv110 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=apple-latest 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=a64fx 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=ampere1 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID ; CHECK-NOT: {{.*}} is not a recognized processor for this target diff --git a/llvm/test/CodeGen/AArch64/float-conv-elim.ll b/llvm/test/CodeGen/AArch64/float-conv-elim.ll new file mode 100644 index 0000000..5442125 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/float-conv-elim.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s + +define i32 @s32_f32_s24_s32(i32 %a) { +; CHECK-LABEL: s32_f32_s24_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: sbfx w0, w0, #0, #24 +; CHECK-NEXT: ret + %f = sitofp i32 %a to float + %i = fptosi float %f to i24 + %r = sext i24 %i to i32 + ret i32 %r +} + +define i32 @s32_f32_u24_u32(i32 %a) { +; CHECK-LABEL: s32_f32_u24_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: and w0, w0, #0xffffff +; CHECK-NEXT: ret + %f = sitofp i32 %a to float + %i = fptoui float %f to i24 + %r = zext i24 %i to i32 + ret i32 %r +} + +define i32 @u32_f32_s24_s32(i32 %a) { +; CHECK-LABEL: u32_f32_s24_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: sbfx w0, w0, #0, #24 +; CHECK-NEXT: ret + %f = uitofp i32 %a to float + %i = fptosi float %f to i24 + %r = sext i24 %i to i32 + ret i32 %r +} + +define i32 @u32_f32_u24_u32(i32 %a) { +; CHECK-LABEL: u32_f32_u24_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: and w0, w0, #0xffffff +; CHECK-NEXT: ret + %f = uitofp i32 %a to float + %i = fptoui float %f to i24 + %r = zext i24 %i to i32 + ret i32 %r +} + +; This requires converting to FP and back. + +define i32 @s32_f32_s25_s32(i32 %a) { +; CHECK-LABEL: s32_f32_s25_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf s0, w0 +; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: ret + %f = sitofp i32 %a to float + %i = fptosi float %f to i25 + %r = sext i25 %i to i32 + ret i32 %r +} + +define i32 @s32_f32_u25_u32(i32 %a) { +; CHECK-LABEL: s32_f32_u25_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf s0, w0 +; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: ret + %f = sitofp i32 %a to float + %i = fptoui float %f to i25 + %r = zext i25 %i to i32 + ret i32 %r +} + +; TODO: This could avoid converting to FP. + +define i32 @u32_f32_s25_s32(i32 %a) { +; CHECK-LABEL: u32_f32_s25_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf s0, w0 +; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: ret + %f = uitofp i32 %a to float + %i = fptosi float %f to i25 + %r = sext i25 %i to i32 + ret i32 %r +} + +define i32 @u32_f32_u25_u32(i32 %a) { +; CHECK-LABEL: u32_f32_u25_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf s0, w0 +; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: ret + %f = uitofp i32 %a to float + %i = fptoui float %f to i25 + %r = zext i25 %i to i32 + ret i32 %r +} diff --git a/llvm/test/CodeGen/AArch64/neon-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-dot-product.ll index 6c76d4e9..f71546c 100644 --- a/llvm/test/CodeGen/AArch64/neon-dot-product.ll +++ b/llvm/test/CodeGen/AArch64/neon-dot-product.ll @@ -5,6 +5,7 @@ ; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=neoverse-e1 < %s | FileCheck %s ; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=neoverse-n1 < %s | FileCheck %s ; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=neoverse-n2 < %s | FileCheck %s +; RUN: llc -mtriple aarch64-none-linux-gnu -mcpu=ampere1 < %s | FileCheck %s declare <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) declare <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/AArch64/remat.ll b/llvm/test/CodeGen/AArch64/remat.ll index bd2786f..062aa47 100644 --- a/llvm/test/CodeGen/AArch64/remat.ll +++ b/llvm/test/CodeGen/AArch64/remat.ll @@ -24,6 +24,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=tsv110 -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx3t110 -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=ampere1 -o - %s | FileCheck %s %X = type { i64, i64, i64 } declare void @f(%X*) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll new file mode 100644 index 0000000..da11e6b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Ensure we don't crash by trying to fold fixed length frame indexes into +; loads/stores that don't support an appropriate addressing mode, hence creating +; too many extra vregs during frame lowering, when we don't have an emergency +; spill slot. + +define dso_local void @func1(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8, + i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* %v13, i64* %v14, i64* %v15, i64* %v16, + i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24, + i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* %v31, i64* %v32, + i64* %v33, i64* %v34, i64* %v35, i64* %v36, i64* %v37, i64* %v38, i64* %v39, i64* %v40, + i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* %v46, i64* %v47, i64* %v48, + i64 %v49) #0 { +; CHECK-LABEL: func1 + tail call void @func2(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8, + i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* undef, i64* %v14, i64* %v15, i64* %v16, + i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24, + i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* undef, i64* undef, + i64* undef, i64* undef, i64* undef, i64* undef, i64* %v37, i64* %v38, i64* %v39, i64* %v40, + i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* undef, i64* %v47, i64* %v48, + i64 undef) + ret void +} + +declare dso_local void @func2(i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*, + i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*, + i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*, + i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*, + i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*, + i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*, + i64) + +attributes #0 = { "target-features"="+sve" vscale_range(2,2) } diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll new file mode 100644 index 0000000..9227c4c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -debug-only=isel < %s 2>&1 | FileCheck %s + +; REQUIRES: asserts + +target triple = "aarch64-unknown-linux-gnu" + +; Ensure that only no offset frame indexes are folded into SVE load/stores when +; accessing fixed width objects. +define void @foo(<8 x i64>* %a) #0 { +; CHECK-LABEL: foo: +; CHECK: SelectionDAG has 14 nodes: +; CHECK-NEXT: t0: ch = EntryToken +; CHECK-NEXT: t12: nxv2i1 = PTRUE_D TargetConstant:i32<31> +; CHECK-NEXT: t2: i64,ch = CopyFromReg t0, Register:i64 %0 +; CHECK-NEXT: t18: nxv2i64,ch = LD1D_IMM<Mem:(volatile load (s512) from %ir.a)> t12, t2, TargetConstant:i64<0>, t0 +; CHECK-NEXT: t8: i64 = ADDXri TargetFrameIndex:i64<1>, TargetConstant:i32<0>, TargetConstant:i32<0> +; CHECK-NEXT: t17: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r0)> t18, t12, TargetFrameIndex:i64<0>, TargetConstant:i64<0>, t0 +; CHECK-NEXT: t16: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r1)> t18, t12, t8, TargetConstant:i64<0>, t17 +; CHECK-NEXT: t10: ch = RET_ReallyLR t16 +; CHECK-EMPTY: +entry: + %r0 = alloca <8 x i64> + %r1 = alloca <8 x i64> + %r = load volatile <8 x i64>, <8 x i64>* %a + store volatile <8 x i64> %r, <8 x i64>* %r0 + store volatile <8 x i64> %r, <8 x i64>* %r1 + ret void +} + +attributes #0 = { nounwind "target-features"="+sve" vscale_range(4,4) } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir index 2a270a41..d41e8f8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_mul_s32 @@ -25,6 +25,11 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](s32) + ; GFX10-LABEL: name: test_mul_s32 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: $vgpr0 = COPY [[MUL]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_MUL %0, %1 @@ -64,6 +69,15 @@ body: | ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_mul_v2s32 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_MUL %0, %1 @@ -115,6 +129,19 @@ body: | ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-LABEL: name: test_mul_s64 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_MUL %0, %1 @@ -202,6 +229,31 @@ body: | ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-LABEL: name: test_mul_v2s64 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_MUL %0, %1 @@ -237,6 +289,14 @@ body: | ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[MUL]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; GFX10-LABEL: name: test_mul_s16 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC1]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[MUL]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -294,6 +354,11 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[COPY]], [[COPY1]] ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](<2 x s16>) + ; GFX10-LABEL: name: test_mul_v2s16 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: $vgpr0 = COPY [[MUL]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_MUL %0, %1 @@ -359,6 +424,29 @@ body: | ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[MUL1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16) + ; GFX10-LABEL: name: test_mul_v3s16 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY6]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF]](s32) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[MUL]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[MUL1]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -462,6 +550,15 @@ body: | ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[UV1]], [[UV3]] ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[MUL]](<2 x s16>), [[MUL1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-LABEL: name: test_mul_v4s16 + ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[UV1]], [[UV3]] + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[MUL]](<2 x s16>), [[MUL1]](<2 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_MUL %0, %1 @@ -489,6 +586,11 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](s32) + ; GFX10-LABEL: name: test_mul_s24 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX10-NEXT: $vgpr0 = COPY [[MUL]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s24) = G_TRUNC %0 @@ -542,6 +644,19 @@ body: | ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-LABEL: name: test_mul_s33 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 @@ -635,6 +750,32 @@ body: | ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; GFX10-LABEL: name: test_mul_s96 + ; GFX10: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; GFX10-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 %2:_(s96) = G_MUL %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir index 69f2583..6405bf3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_sdiv_s32 @@ -118,6 +118,42 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX10-LABEL: name: test_sdiv_s32 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SDIV %0, %1 @@ -331,6 +367,73 @@ body: | ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_sdiv_v2s32 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] + ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SDIV %0, %1 @@ -853,6 +956,176 @@ body: | ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX10-LABEL: name: test_sdiv_s64 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SDIV %0, %1 @@ -1858,6 +2131,337 @@ body: | ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-LABEL: name: test_sdiv_v2s64 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV27]], [[UADDO39]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV29]], [[UADDO41]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO7]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]] + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE12]](s32) + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]] + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE14]](s32) + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32) + ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO9]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] + ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] + ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD20]] + ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] + ; GFX10-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO57]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO56]] + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE10]], [[UADDO56]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE16]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO56]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL24]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD25]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]] + ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD25]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL24]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD25]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]] + ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD26]] + ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] + ; GFX10-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD29]], [[UADDO69]] + ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]] + ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE18]] + ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]] + ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]] + ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) + ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE18]] + ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]] + ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE18]] + ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) + ; GFX10-NEXT: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]] + ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) + ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX10-NEXT: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD30]] + ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) + ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE18]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD33]](s32) + ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]] + ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]] + ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD33]] + ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD35]], [[USUBO11]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD35]] + ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE12]](s32), [[UV55]] + ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV54]] + ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE12]](s32), [[UV55]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV54]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO11]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[USUBE14]], [[C6]], [[USUBO13]] + ; GFX10-NEXT: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]] + ; GFX10-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV57]], [[UADDO81]] + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE20]](s32) + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]] + ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] + ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX10-NEXT: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]] + ; GFX10-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[UV59]], [[UADDO83]] + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE22]](s32) + ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] + ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] + ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX10-NEXT: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) + ; GFX10-NEXT: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) + ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]] + ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO15]] + ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE18]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_SDIV %0, %1 @@ -1984,6 +2588,44 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX10-LABEL: name: test_sdiv_s16 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -2231,6 +2873,80 @@ body: | ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB3]](s32), [[SUB7]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX10-LABEL: name: test_sdiv_v2s16 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX10-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] + ; GFX10-NEXT: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX10-NEXT: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB3]](s32), [[SUB7]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SDIV %0, %1 @@ -2357,6 +3073,44 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX10-LABEL: name: test_sdiv_s7 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -2486,6 +3240,44 @@ body: | ; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32) + ; GFX10-LABEL: name: test_sdiv_s17 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -3017,6 +3809,178 @@ body: | ; GFX9-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX10-LABEL: name: test_sdiv_s33 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV23]], [[UADDO39]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UV25]], [[UADDO41]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO7]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE8]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir index d8bfabd..ba71db5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_srem_s32 @@ -106,6 +106,38 @@ body: | ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX10-LABEL: name: test_srem_s32 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SREM %0, %1 @@ -298,6 +330,66 @@ body: | ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_srem_v2s32 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] + ; GFX10-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] + ; GFX10-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SREM %0, %1 @@ -805,6 +897,171 @@ body: | ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX10-LABEL: name: test_srem_s64 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SREM %0, %1 @@ -1783,6 +2040,328 @@ body: | ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-LABEL: name: test_srem_v2s64 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD17]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]] + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE8]](s32) + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]] + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE10]](s32) + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32) + ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C1]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C4]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX10-NEXT: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]] + ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) + ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]] + ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD20]] + ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO53]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]] + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE12]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[MUL24]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD25]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]] + ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) + ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE12]], [[ADD25]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[MUL24]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD25]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]] + ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD26]] + ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE12]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[ADD29]], [[UADDO65]] + ; GFX10-NEXT: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]] + ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE14]] + ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]] + ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) + ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE14]] + ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]] + ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE14]] + ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]] + ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) + ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD30]] + ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) + ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE14]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX10-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]] + ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]] + ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD33]] + ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD35]], [[USUBO13]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD35]] + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) + ; GFX10-NEXT: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]] + ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]] + ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]] + ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]] + ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32) + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]] + ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]] + ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX10-NEXT: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]] + ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]] + ; GFX10-NEXT: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32) + ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] + ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX10-NEXT: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX10-NEXT: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX10-NEXT: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] + ; GFX10-NEXT: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]] + ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_SREM %0, %1 @@ -1903,6 +2482,42 @@ body: | ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX10-LABEL: name: test_srem_s16 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C3]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -2129,6 +2744,73 @@ body: | ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB4]](s32), [[SUB9]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX10-LABEL: name: test_srem_v2s16 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; GFX10-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; GFX10-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX10-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX10-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX10-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] + ; GFX10-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] + ; GFX10-NEXT: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] + ; GFX10-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB4]](s32), [[SUB9]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SREM %0, %1 @@ -2243,6 +2925,40 @@ body: | ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX10-LABEL: name: test_srem_s7 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -2360,6 +3076,40 @@ body: | ; GFX9-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9-NEXT: $vgpr0 = COPY [[SUB4]](s32) + ; GFX10-LABEL: name: test_srem_s17 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX10-NEXT: $vgpr0 = COPY [[SUB4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -2876,6 +3626,173 @@ body: | ; GFX9-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) + ; GFX10-LABEL: name: test_srem_s33 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 + ; GFX10-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX10-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX10-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir index 46ea14df..d17c2bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_udiv_s32 @@ -88,6 +88,32 @@ body: | ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX10-LABEL: name: test_udiv_s32 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UDIV %0, %1 @@ -244,6 +270,54 @@ body: | ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_udiv_v2s32 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_UDIV %0, %1 @@ -700,6 +774,154 @@ body: | ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX10-LABEL: name: test_udiv_s64 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UDIV %0, %1 @@ -1576,6 +1798,294 @@ body: | ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-LABEL: name: test_udiv_v2s64 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV19]], [[UADDO35]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV21]], [[UADDO37]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32) + ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO7]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[FPTOUI3]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]] + ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) + ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]] + ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) + ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD20]] + ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]] + ; GFX10-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO49]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDO48]] + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE8]], [[UADDO48]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO6]], [[UADDE8]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO6]], [[UADDO48]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[MUL24]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD25]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]] + ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE8]], [[ADD25]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[MUL24]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD25]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]] + ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) + ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD26]] + ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE8]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]] + ; GFX10-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[ADD29]], [[UADDO61]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]] + ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE10]] + ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]] + ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE10]] + ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]] + ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE10]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX10-NEXT: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]] + ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) + ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX10-NEXT: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD30]] + ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE10]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD33]](s32) + ; GFX10-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]] + ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]] + ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD33]] + ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD35]], [[USUBO9]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD35]] + ; GFX10-NEXT: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE10]](s32), [[UV35]] + ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO8]](s32), [[UV34]] + ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE10]](s32), [[UV35]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[USUBO8]], [[UV34]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO9]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[USUBE12]], [[C5]], [[USUBO11]] + ; GFX10-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]] + ; GFX10-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD33]], [[UV37]], [[UADDO73]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE12]](s32) + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]] + ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]] + ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX10-NEXT: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64) + ; GFX10-NEXT: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]] + ; GFX10-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[UV39]], [[UADDO75]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE14]](s32) + ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] + ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_UDIV %0, %1 @@ -1678,6 +2188,36 @@ body: | ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX10-LABEL: name: test_udiv_s16 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -1869,6 +2409,62 @@ body: | ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT2]](s32), [[SELECT5]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX10-LABEL: name: test_udiv_v2s16 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT2]](s32), [[SELECT5]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_UDIV %0, %1 @@ -1968,6 +2564,35 @@ body: | ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX10-LABEL: name: test_udiv_s7 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -2070,6 +2695,35 @@ body: | ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; GFX10-LABEL: name: test_udiv_s17 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -2538,6 +3192,157 @@ body: | ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX10-LABEL: name: test_udiv_s33 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD15]](s32) + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[ADD15]], [[UV15]], [[UADDO35]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE4]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[UV17]], [[UADDO37]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE6]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir index 47077c7..20bfb0b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir @@ -10,24 +10,24 @@ body: | ; GFX8-LABEL: name: test_umulo_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX8: $vgpr0 = COPY [[MUL]](s32) - ; GFX8: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: $vgpr0 = COPY [[MUL]](s32) + ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) ; GFX9-LABEL: name: test_umulo_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX9: $vgpr0 = COPY [[MUL]](s32) - ; GFX9: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: $vgpr0 = COPY [[MUL]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32), %3:_(s1) = G_UMULO %0, %1 @@ -44,46 +44,46 @@ body: | ; GFX8-LABEL: name: test_umulo_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX8: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) ; GFX9-LABEL: name: test_umulo_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX9: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>), %3:_(<2 x s1>) = G_UMULO %0, %1 @@ -100,86 +100,86 @@ body: | ; GFX8-LABEL: name: test_umulo_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] - ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX8: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) - ; GFX8: $vgpr0_vgpr1 = COPY [[MV1]](s64) - ; GFX8: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) ; GFX9-LABEL: name: test_umulo_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] - ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX9: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) - ; GFX9: $vgpr0_vgpr1 = COPY [[MV1]](s64) - ; GFX9: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64), %3:_(s1) = G_UMULO %0, %1 @@ -196,170 +196,170 @@ body: | ; GFX8-LABEL: name: test_umulo_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] - ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]] - ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]] - ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]] - ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]] - ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]] - ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]] - ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]] - ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]] - ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]] - ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]] - ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]] - ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]] - ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]] - ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32) - ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]] - ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] - ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] - ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] - ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]] - ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]] - ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] - ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] - ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX8: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]] + ; GFX8-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]] + ; GFX8-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]] + ; GFX8-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] + ; GFX8-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] + ; GFX8-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] + ; GFX8-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]] + ; GFX8-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) ; GFX9-LABEL: name: test_umulo_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] - ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] - ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] - ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]] - ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]] - ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]] - ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] - ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]] - ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]] - ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]] - ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]] - ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]] - ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]] - ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]] - ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]] - ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]] - ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]] - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32) - ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]] - ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] - ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] - ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] - ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]] - ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]] - ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) - ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX9: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]] + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH4]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD5]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV]](s64), [[C]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV14]] + ; GFX9-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UV15]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV14]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL6]], [[MUL7]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UV15]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV14]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UV15]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL8]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH7]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD6]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UV15]] + ; GFX9-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UMULH8]], [[ADD8]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD9]](s32) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]] + ; GFX9-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]] + ; GFX9-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]] + ; GFX9-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]] + ; GFX9-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[MUL10]], [[MUL11]] + ; GFX9-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[UMULH9]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL9]](s32), [[ADD11]](s32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>), %3:_(<2 x s1>) = G_UMULO %0, %1 @@ -376,38 +376,38 @@ body: | ; GFX8-LABEL: name: test_umulo_s24 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1) - ; GFX8: $vgpr0 = COPY [[AND3]](s32) - ; GFX8: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1) + ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) ; GFX9-LABEL: name: test_umulo_s24 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1) - ; GFX9: $vgpr0 = COPY [[AND3]](s32) - ; GFX9: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1) + ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s24) = G_TRUNC %0 @@ -428,30 +428,30 @@ body: | ; GFX8-LABEL: name: test_umulo_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX8: $vgpr0 = COPY [[AND3]](s32) - ; GFX8: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) ; GFX9-LABEL: name: test_umulo_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX9: $vgpr0 = COPY [[AND3]](s32) - ; GFX9: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -471,30 +471,30 @@ body: | ; GFX8-LABEL: name: test_umulo_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX8: $vgpr0 = COPY [[AND3]](s32) - ; GFX8: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) ; GFX9-LABEL: name: test_umulo_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; GFX9: $vgpr0 = COPY [[AND3]](s32) - ; GFX9: $vgpr1 = COPY [[ZEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; GFX9-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -513,70 +513,70 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-LABEL: name: test_umulo_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] - ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; GFX8: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; GFX8: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND10]](s32), [[AND11]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) - ; GFX8: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] + ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND10]](s32), [[AND11]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_umulo_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[MUL]](s32), [[MUL1]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] - ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) - ; GFX9: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[MUL]](s32), [[MUL1]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s16>) = G_TRUNC %0 @@ -596,64 +596,64 @@ body: | liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-LABEL: name: test_umulo_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) - ; GFX8: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) - ; GFX8: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX8: $vgpr1 = COPY [[ANYEXT1]](s32) - ; GFX8: $vgpr2 = COPY [[ANYEXT2]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; GFX8-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) ; GFX9-LABEL: name: test_umulo_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) - ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) - ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: $vgpr1 = COPY [[ANYEXT1]](s32) - ; GFX9: $vgpr2 = COPY [[ANYEXT2]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -684,84 +684,84 @@ body: | liveins: $vgpr0, $vgpr1 ; GFX8-LABEL: name: test_umulo_v4s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] - ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] - ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] - ; GFX8: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]] - ; GFX8: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX8: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]] - ; GFX8: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX8: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX8: $vgpr0 = COPY [[OR2]](s32) - ; GFX8: $vgpr1 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]] + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] + ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]] + ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX8-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umulo_v4s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] - ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] - ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] - ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] - ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]] - ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]] - ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) - ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) - ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9: $vgpr0 = COPY [[OR2]](s32) - ; GFX9: $vgpr1 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]] + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] + ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]] + ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir index 934f5b5..cf63e5398c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_urem_s32 @@ -79,6 +79,29 @@ body: | ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX10-LABEL: name: test_urem_s32 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UREM %0, %1 @@ -220,6 +243,49 @@ body: | ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_urem_v2s32 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_UREM %0, %1 @@ -664,6 +730,150 @@ body: | ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX10-LABEL: name: test_urem_s64 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UREM %0, %1 @@ -1519,6 +1729,287 @@ body: | ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-LABEL: name: test_urem_v2s64 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD17]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX10-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32) + ; GFX10-NEXT: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32) + ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C]] + ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C1]] + ; GFX10-NEXT: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C2]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX10-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C3]] + ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX10-NEXT: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX10-NEXT: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX10-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; GFX10-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]] + ; GFX10-NEXT: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]] + ; GFX10-NEXT: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]] + ; GFX10-NEXT: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]] + ; GFX10-NEXT: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]] + ; GFX10-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX10-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[ADD18]], [[UMULH15]] + ; GFX10-NEXT: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX10-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX10-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX10-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]] + ; GFX10-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX10-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX10-NEXT: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX10-NEXT: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD19]] + ; GFX10-NEXT: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX10-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1) + ; GFX10-NEXT: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]] + ; GFX10-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1) + ; GFX10-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX10-NEXT: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD20]] + ; GFX10-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX10-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ADD21]], [[ZEXT19]] + ; GFX10-NEXT: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD19]] + ; GFX10-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD22]] + ; GFX10-NEXT: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]] + ; GFX10-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD23]], [[UADDO45]] + ; GFX10-NEXT: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]] + ; GFX10-NEXT: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]] + ; GFX10-NEXT: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE4]] + ; GFX10-NEXT: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]] + ; GFX10-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX10-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[ADD24]], [[UMULH20]] + ; GFX10-NEXT: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL24]] + ; GFX10-NEXT: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD25]] + ; GFX10-NEXT: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]] + ; GFX10-NEXT: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX10-NEXT: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX10-NEXT: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]] + ; GFX10-NEXT: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX10-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX10-NEXT: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD25]] + ; GFX10-NEXT: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL24]] + ; GFX10-NEXT: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD25]] + ; GFX10-NEXT: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX10-NEXT: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX10-NEXT: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]] + ; GFX10-NEXT: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX10-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX10-NEXT: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD26]] + ; GFX10-NEXT: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX10-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ADD27]], [[ZEXT24]] + ; GFX10-NEXT: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD25]] + ; GFX10-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD28]] + ; GFX10-NEXT: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]] + ; GFX10-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD29]], [[UADDO57]] + ; GFX10-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX10-NEXT: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]] + ; GFX10-NEXT: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]] + ; GFX10-NEXT: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX10-NEXT: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX10-NEXT: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]] + ; GFX10-NEXT: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX10-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX10-NEXT: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE6]] + ; GFX10-NEXT: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]] + ; GFX10-NEXT: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE6]] + ; GFX10-NEXT: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX10-NEXT: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX10-NEXT: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]] + ; GFX10-NEXT: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX10-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX10-NEXT: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD30]] + ; GFX10-NEXT: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX10-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ADD31]], [[ZEXT29]] + ; GFX10-NEXT: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE6]] + ; GFX10-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD32]] + ; GFX10-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]] + ; GFX10-NEXT: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]] + ; GFX10-NEXT: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD33]] + ; GFX10-NEXT: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]] + ; GFX10-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX10-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[ADD34]], [[UMULH29]] + ; GFX10-NEXT: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]] + ; GFX10-NEXT: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD35]], [[USUBO11]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD35]] + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32) + ; GFX10-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX10-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]] + ; GFX10-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX10-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]] + ; GFX10-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX10-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]] + ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX10-NEXT: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]] + ; GFX10-NEXT: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]] + ; GFX10-NEXT: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]] + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32) + ; GFX10-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]] + ; GFX10-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX10-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]] + ; GFX10-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX10-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]] + ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX10-NEXT: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]] + ; GFX10-NEXT: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]] + ; GFX10-NEXT: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]] + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32) + ; GFX10-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]] + ; GFX10-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]] + ; GFX10-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_UREM %0, %1 @@ -1612,6 +2103,33 @@ body: | ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX10-LABEL: name: test_urem_s16 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -1788,6 +2306,57 @@ body: | ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT1]](s32), [[SELECT3]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX10-LABEL: name: test_urem_v2s16 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_UREM %0, %1 @@ -1878,6 +2447,32 @@ body: | ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX10-LABEL: name: test_urem_s7 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -1971,6 +2566,32 @@ body: | ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX10-LABEL: name: test_urem_s17 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX10-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX10-NEXT: $vgpr0 = COPY [[SELECT1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -2427,6 +3048,153 @@ body: | ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; GFX10-LABEL: name: test_urem_s33 + ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX10-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX10-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]] + ; GFX10-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]] + ; GFX10-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX10-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX10-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX10-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]] + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX10-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX10-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX10-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX10-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]] + ; GFX10-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX10-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]] + ; GFX10-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX10-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX10-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]] + ; GFX10-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]] + ; GFX10-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]] + ; GFX10-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]] + ; GFX10-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]] + ; GFX10-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX10-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]] + ; GFX10-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]] + ; GFX10-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX10-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX10-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]] + ; GFX10-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX10-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX10-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]] + ; GFX10-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD7]] + ; GFX10-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX10-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX10-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]] + ; GFX10-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX10-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX10-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD8]] + ; GFX10-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX10-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]] + ; GFX10-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD7]] + ; GFX10-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]] + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]] + ; GFX10-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UADDE]], [[ADD11]], [[UADDO23]] + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX10-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX10-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]] + ; GFX10-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX10-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX10-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]] + ; GFX10-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1) + ; GFX10-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX10-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]] + ; GFX10-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE2]] + ; GFX10-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX10-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX10-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]] + ; GFX10-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX10-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX10-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD12]] + ; GFX10-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX10-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]] + ; GFX10-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE2]] + ; GFX10-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]] + ; GFX10-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]] + ; GFX10-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD15]] + ; GFX10-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]] + ; GFX10-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX10-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]] + ; GFX10-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]] + ; GFX10-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD17]], [[USUBO3]] + ; GFX10-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD17]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX10-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX10-NEXT: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]] + ; GFX10-NEXT: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]] + ; GFX10-NEXT: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX10-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]] + ; GFX10-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX10-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]] + ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX10-NEXT: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]] + ; GFX10-NEXT: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]] + ; GFX10-NEXT: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX10-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]] + ; GFX10-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index a4eb3a4..129a2c2 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -1748,46 +1748,35 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa define amdgpu_kernel void @v_insertelement_v8f16_3(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in, i32 %val) { ; GFX9-LABEL: v_insertelement_v8f16_3: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: s_load_dword s6, s[4:5], 0x10 -; GFX9-NEXT: s_add_u32 s0, s0, s7 -; GFX9-NEXT: s_addc_u32 s1, s1, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 4, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v4, s[10:11] -; GFX9-NEXT: v_mov_b32_e32 v5, s6 -; GFX9-NEXT: buffer_store_short v5, off, s[0:3], 0 offset:16 -; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:16 -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshl_or_b32 v1, v5, 16, v1 -; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9] +; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX9-NEXT: v_lshl_or_b32 v1, s6, 16, v1 +; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm ; ; VI-LABEL: v_insertelement_v8f16_3: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; VI-NEXT: s_load_dword s4, s[4:5], 0x10 ; VI-NEXT: v_lshlrev_b32_e32 v4, 4, v0 -; VI-NEXT: s_add_u32 s0, s0, s7 -; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s11 -; VI-NEXT: v_add_u32_e32 v0, vcc, s10, v4 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v5, s4 ; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; VI-NEXT: buffer_store_short v5, off, s[0:3], 0 offset:16 -; VI-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:16 -; VI-NEXT: v_mov_b32_e32 v5, s9 -; VI-NEXT: v_add_u32_e32 v4, vcc, s8, v4 -; VI-NEXT: s_mov_b32 s4, 0xffff +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: s_lshl_b32 s1, s4, 16 +; VI-NEXT: s_mov_b32 s2, 0xffff +; VI-NEXT: v_add_u32_e32 v4, vcc, s0, v4 +; VI-NEXT: v_mov_b32_e32 v6, s1 ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_bfi_b32 v3, s4, v3, v3 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; VI-NEXT: v_bfi_b32 v3, s2, v3, v3 ; VI-NEXT: v_or_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-NEXT: s_endpgm @@ -1826,45 +1815,35 @@ define amdgpu_kernel void @v_insertelement_v8f16_3(<8 x half> addrspace(1)* %out define amdgpu_kernel void @v_insertelement_v8i16_6(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in, i32 %val) { ; GFX9-LABEL: v_insertelement_v8i16_6: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: s_load_dword s6, s[4:5], 0x10 -; GFX9-NEXT: s_add_u32 s0, s0, s7 -; GFX9-NEXT: s_addc_u32 s1, s1, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 4, v0 +; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v4, s[10:11] -; GFX9-NEXT: v_mov_b32_e32 v5, s6 -; GFX9-NEXT: buffer_store_short v5, off, s[0:3], 0 offset:16 -; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:16 -; GFX9-NEXT: v_mov_b32_e32 v6, 0xffff +; GFX9-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_bfi_b32 v3, v6, v5, v3 -; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9] +; GFX9-NEXT: v_bfi_b32 v3, v5, s6, v3 +; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm ; ; VI-LABEL: v_insertelement_v8i16_6: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; VI-NEXT: s_load_dword s4, s[4:5], 0x10 ; VI-NEXT: v_lshlrev_b32_e32 v4, 4, v0 -; VI-NEXT: s_add_u32 s0, s0, s7 -; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s11 -; VI-NEXT: v_add_u32_e32 v0, vcc, s10, v4 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_mov_b32_e32 v5, s4 ; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; VI-NEXT: buffer_store_short v5, off, s[0:3], 0 offset:16 -; VI-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:16 -; VI-NEXT: s_mov_b32 s4, 0xffff -; VI-NEXT: v_mov_b32_e32 v5, s9 -; VI-NEXT: v_add_u32_e32 v4, vcc, s8, v4 +; VI-NEXT: s_mov_b32 s2, 0xffff +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: v_mov_b32_e32 v6, s4 +; VI-NEXT: v_add_u32_e32 v4, vcc, s0, v4 ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_bfi_b32 v1, s4, v1, v1 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_bfi_b32 v3, s4, v6, v3 +; VI-NEXT: v_bfi_b32 v3, s2, v6, v3 +; VI-NEXT: v_bfi_b32 v1, s2, v1, v1 ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll new file mode 100644 index 0000000..d4ca078 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s +; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX906 %s +; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefixes=GFX908 %s +; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A %s + +define amdgpu_kernel void @scalar_to_vector_v8i16(<2 x i32> %in, <8 x i16>* %out) #0 { +; GFX900-LABEL: scalar_to_vector_v8i16: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX900-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX900-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX900-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-NEXT: s_pack_lh_b32_b16 s4, s0, s0 +; GFX900-NEXT: v_mov_b32_e32 v6, s3 +; GFX900-NEXT: v_add_co_u32_e32 v5, vcc, s2, v0 +; GFX900-NEXT: v_mov_b32_e32 v2, s1 +; GFX900-NEXT: v_mov_b32_e32 v4, s0 +; GFX900-NEXT: v_mov_b32_e32 v1, s4 +; GFX900-NEXT: v_mov_b32_e32 v3, s4 +; GFX900-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc +; GFX900-NEXT: flat_store_dwordx4 v[5:6], v[1:4] +; GFX900-NEXT: s_endpgm +; +; GFX906-LABEL: scalar_to_vector_v8i16: +; GFX906: ; %bb.0: ; %entry +; GFX906-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX906-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX906-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX906-NEXT: s_waitcnt lgkmcnt(0) +; GFX906-NEXT: s_pack_lh_b32_b16 s4, s0, s0 +; GFX906-NEXT: v_mov_b32_e32 v6, s3 +; GFX906-NEXT: v_add_co_u32_e32 v5, vcc, s2, v0 +; GFX906-NEXT: v_mov_b32_e32 v2, s1 +; GFX906-NEXT: v_mov_b32_e32 v4, s0 +; GFX906-NEXT: v_mov_b32_e32 v1, s4 +; GFX906-NEXT: v_mov_b32_e32 v3, s4 +; GFX906-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc +; GFX906-NEXT: flat_store_dwordx4 v[5:6], v[1:4] +; GFX906-NEXT: s_endpgm +; +; GFX908-LABEL: scalar_to_vector_v8i16: +; GFX908: ; %bb.0: ; %entry +; GFX908-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX908-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX908-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: s_pack_lh_b32_b16 s4, s0, s0 +; GFX908-NEXT: v_mov_b32_e32 v6, s3 +; GFX908-NEXT: v_add_co_u32_e32 v5, vcc, s2, v0 +; GFX908-NEXT: v_mov_b32_e32 v2, s1 +; GFX908-NEXT: v_mov_b32_e32 v4, s0 +; GFX908-NEXT: v_mov_b32_e32 v1, s4 +; GFX908-NEXT: v_mov_b32_e32 v3, s4 +; GFX908-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc +; GFX908-NEXT: flat_store_dwordx4 v[5:6], v[1:4] +; GFX908-NEXT: s_endpgm +; +; GFX90A-LABEL: scalar_to_vector_v8i16: +; GFX90A: ; %bb.0: ; %entry +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_pack_lh_b32_b16 s4, s0, s0 +; GFX90A-NEXT: v_mov_b32_e32 v1, s3 +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s2, v0 +; GFX90A-NEXT: v_mov_b32_e32 v3, s1 +; GFX90A-NEXT: v_mov_b32_e32 v5, s0 +; GFX90A-NEXT: v_mov_b32_e32 v2, s4 +; GFX90A-NEXT: v_mov_b32_e32 v4, s4 +; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX90A-NEXT: flat_store_dwordx4 v[0:1], v[2:5] +; GFX90A-NEXT: s_endpgm +entry: + %val.1.i32 = extractelement <2 x i32> %in, i64 0 + %val.2.vec2.i16 = bitcast i32 %val.1.i32 to <2 x i16> + %val.3.vec8.i16 = shufflevector <2 x i16> %val.2.vec2.i16, <2 x i16> %val.2.vec2.i16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> + + %val.4.vec4.i32 = shufflevector <2 x i32> %in, <2 x i32> %in, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %val.5.vec8.i16 = bitcast <4 x i32> %val.4.vec4.i32 to <8 x i16> + + %val.6.vec8.i16 = shufflevector <8 x i16> %val.5.vec8.i16, <8 x i16> %val.3.vec8.i16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9> + + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 + %tid.ext = sext i32 %tid to i64 + %out.gep = getelementptr inbounds <8 x i16>, <8 x i16>* %out, i64 %tid.ext + store <8 x i16> %val.6.vec8.i16, <8 x i16>* %out.gep, align 16 + + ret void +} + +define amdgpu_kernel void @scalar_to_vector_v8f16(<2 x float> %in, <8 x half>* %out) #0 { +; GFX900-LABEL: scalar_to_vector_v8f16: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX900-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX900-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX900-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-NEXT: s_lshr_b32 s4, s0, 16 +; GFX900-NEXT: v_mov_b32_e32 v3, s0 +; GFX900-NEXT: s_pack_ll_b32_b16 s0, s0, s4 +; GFX900-NEXT: v_mov_b32_e32 v6, s3 +; GFX900-NEXT: v_add_co_u32_e32 v5, vcc, s2, v0 +; GFX900-NEXT: v_mov_b32_e32 v2, s1 +; GFX900-NEXT: v_mov_b32_e32 v1, s0 +; GFX900-NEXT: v_mov_b32_e32 v4, s0 +; GFX900-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc +; GFX900-NEXT: flat_store_dwordx4 v[5:6], v[1:4] +; GFX900-NEXT: s_endpgm +; +; GFX906-LABEL: scalar_to_vector_v8f16: +; GFX906: ; %bb.0: ; %entry +; GFX906-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX906-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX906-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX906-NEXT: s_waitcnt lgkmcnt(0) +; GFX906-NEXT: s_lshr_b32 s4, s0, 16 +; GFX906-NEXT: v_mov_b32_e32 v3, s0 +; GFX906-NEXT: s_pack_ll_b32_b16 s0, s0, s4 +; GFX906-NEXT: v_mov_b32_e32 v6, s3 +; GFX906-NEXT: v_add_co_u32_e32 v5, vcc, s2, v0 +; GFX906-NEXT: v_mov_b32_e32 v2, s1 +; GFX906-NEXT: v_mov_b32_e32 v1, s0 +; GFX906-NEXT: v_mov_b32_e32 v4, s0 +; GFX906-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc +; GFX906-NEXT: flat_store_dwordx4 v[5:6], v[1:4] +; GFX906-NEXT: s_endpgm +; +; GFX908-LABEL: scalar_to_vector_v8f16: +; GFX908: ; %bb.0: ; %entry +; GFX908-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX908-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX908-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: s_lshr_b32 s4, s0, 16 +; GFX908-NEXT: v_mov_b32_e32 v3, s0 +; GFX908-NEXT: s_pack_ll_b32_b16 s0, s0, s4 +; GFX908-NEXT: v_mov_b32_e32 v6, s3 +; GFX908-NEXT: v_add_co_u32_e32 v5, vcc, s2, v0 +; GFX908-NEXT: v_mov_b32_e32 v2, s1 +; GFX908-NEXT: v_mov_b32_e32 v1, s0 +; GFX908-NEXT: v_mov_b32_e32 v4, s0 +; GFX908-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc +; GFX908-NEXT: flat_store_dwordx4 v[5:6], v[1:4] +; GFX908-NEXT: s_endpgm +; +; GFX90A-LABEL: scalar_to_vector_v8f16: +; GFX90A: ; %bb.0: ; %entry +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_lshr_b32 s4, s0, 16 +; GFX90A-NEXT: v_mov_b32_e32 v4, s0 +; GFX90A-NEXT: s_pack_ll_b32_b16 s0, s0, s4 +; GFX90A-NEXT: v_mov_b32_e32 v1, s3 +; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s2, v0 +; GFX90A-NEXT: v_mov_b32_e32 v3, s1 +; GFX90A-NEXT: v_mov_b32_e32 v2, s0 +; GFX90A-NEXT: v_mov_b32_e32 v5, s0 +; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX90A-NEXT: flat_store_dwordx4 v[0:1], v[2:5] +; GFX90A-NEXT: s_endpgm +entry: + %val.1.float = extractelement <2 x float> %in, i64 0 + %val.2.vec2.half = bitcast float %val.1.float to <2 x half> + %val.3.vec8.half = shufflevector <2 x half> %val.2.vec2.half, <2 x half> %val.2.vec2.half, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> + + %val.4.vec4.float = shufflevector <2 x float> %in, <2 x float> %in, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %val.5.vec8.half = bitcast <4 x float> %val.4.vec4.float to <8 x half> + + %val.6.vec8.half = shufflevector <8 x half> %val.5.vec8.half, <8 x half> %val.3.vec8.half, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9> + + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 + %tid.ext = sext i32 %tid to i64 + %out.gep = getelementptr inbounds <8 x half>, <8 x half>* %out, i64 %tid.ext + store <8 x half> %val.6.vec8.half, <8 x half>* %out.gep, align 16 + + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/ARM/segmented-stacks.ll b/llvm/test/CodeGen/ARM/segmented-stacks.ll index 10ce065..3861712 100644 --- a/llvm/test/CodeGen/ARM/segmented-stacks.ll +++ b/llvm/test/CodeGen/ARM/segmented-stacks.ll @@ -280,4 +280,24 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 { } + +declare void @panic() unnamed_addr + +; We used to crash while compiling the following function. +; ARM-linux-LABEL: build_should_not_segfault: +; ARM-android-LABEL: build_should_not_segfault: +define void @build_should_not_segfault(i8 %x) unnamed_addr #0 { +start: + %_0 = icmp ult i8 %x, 16 + %or.cond = select i1 undef, i1 true, i1 %_0 + br i1 %or.cond, label %bb1, label %bb2 + +bb1: + ret void + +bb2: + call void @panic() + unreachable +} + attributes #0 = { "split-stack" } diff --git a/llvm/test/CodeGen/PowerPC/addegluecrash.ll b/llvm/test/CodeGen/PowerPC/addegluecrash.ll index 2338ca9..404360e 100644 --- a/llvm/test/CodeGen/PowerPC/addegluecrash.ll +++ b/llvm/test/CodeGen/PowerPC/addegluecrash.ll @@ -23,12 +23,11 @@ define void @bn_mul_comba8(i64* nocapture %r, i64* nocapture readonly %a, i64* n ; CHECK-NEXT: addc 6, 6, 7 ; CHECK-NEXT: addze 5, 5 ; CHECK-NEXT: add 3, 5, 3 -; CHECK-NEXT: cmpld 7, 3, 5 -; CHECK-NEXT: mfocrf 3, 1 -; CHECK-NEXT: rlwinm 5, 3, 29, 31, 31 -; CHECK-NEXT: # implicit-def: $x3 -; CHECK-NEXT: mr 3, 5 -; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: cmpld 3, 5 +; CHECK-NEXT: crmove 20, 0 +; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: isel 3, 3, 5, 20 ; CHECK-NEXT: std 3, 0(4) ; CHECK-NEXT: blr %1 = load i64, i64* %a, align 8 diff --git a/llvm/test/CodeGen/PowerPC/atomics-constant.ll b/llvm/test/CodeGen/PowerPC/atomics-constant.ll index 80f84d8..3a7da6f 100644 --- a/llvm/test/CodeGen/PowerPC/atomics-constant.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-constant.ll @@ -3,16 +3,14 @@ target triple = "powerpc64le-unknown-linux-gnu" -@a = dso_local constant i64 zeroinitializer +@a = dso_local global i64 zeroinitializer define i64 @foo() { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li 4, 0 ; CHECK-NEXT: addis 3, 2, a@toc@ha ; CHECK-NEXT: ld 3, a@toc@l(3) -; CHECK-NEXT: cmpd 7, 4, 4 -; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: cmpd 7, 3, 3 ; CHECK-NEXT: bne- 7, .+4 ; CHECK-NEXT: isync ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll b/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll index bac39c1..4156455 100644 --- a/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll +++ b/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll @@ -14,10 +14,6 @@ define i32 @test_choice1(fp128 %a, fp128 %b) #0 { ; P8-NEXT: nop ; P8-NEXT: # kill: def $r3 killed $r3 killed $x3 ; P8-NEXT: cmplwi 3, 0 -; P8-NEXT: li 3, 0 -; P8-NEXT: li 4, 1 -; P8-NEXT: iseleq 3, 3, 4 -; P8-NEXT: cmplwi 3, 0 ; P8-NEXT: bne 0, .LBB0_2 ; P8-NEXT: b .LBB0_1 ; P8-NEXT: .LBB0_1: # %if.true @@ -88,11 +84,7 @@ define i32 @test_choice2(fp128 %a, fp128 %b) #0 { ; P8-NEXT: nop ; P8-NEXT: # kill: def $r3 killed $r3 killed $x3 ; P8-NEXT: cmpwi 3, 1 -; P8-NEXT: li 4, 0 -; P8-NEXT: li 3, 1 -; P8-NEXT: isellt 3, 3, 4 -; P8-NEXT: cmplwi 3, 0 -; P8-NEXT: bne 0, .LBB1_2 +; P8-NEXT: blt 0, .LBB1_2 ; P8-NEXT: b .LBB1_1 ; P8-NEXT: .LBB1_1: # %if.true ; P8-NEXT: bl foo @@ -115,37 +107,30 @@ define i32 @test_choice2(fp128 %a, fp128 %b) #0 { ; P9: # %bb.0: # %entry ; P9-NEXT: mflr 0 ; P9-NEXT: std 0, 16(1) -; P9-NEXT: stdu 1, -128(1) -; P9-NEXT: xscmpuqp 7, 2, 3 -; P9-NEXT: mfocrf 3, 1 -; P9-NEXT: rotlwi 3, 3, 28 -; P9-NEXT: stw 3, 124(1) -; P9-NEXT: mfocrf 3, 1 -; P9-NEXT: lwz 4, 124(1) -; P9-NEXT: rotlwi 4, 4, 4 -; P9-NEXT: mtocrf 1, 4 -; P9-NEXT: clrlwi 3, 3, 31 -; P9-NEXT: xori 4, 3, 1 -; P9-NEXT: mfocrf 3, 1 -; P9-NEXT: rlwinm 3, 3, 30, 31, 31 -; P9-NEXT: xori 3, 3, 1 -; P9-NEXT: and 3, 3, 4 -; P9-NEXT: cmplwi 3, 0 -; P9-NEXT: bne 0, .LBB1_2 +; P9-NEXT: stdu 1, -112(1) +; P9-NEXT: xscmpuqp 0, 2, 3 +; P9-NEXT: crmove 20, 3 +; P9-NEXT: crnot 21, 20 +; P9-NEXT: crmove 20, 1 +; P9-NEXT: crnot 20, 20 +; P9-NEXT: crand 20, 20, 21 +; P9-NEXT: crxor 21, 21, 21 +; P9-NEXT: crxor 20, 20, 21 +; P9-NEXT: bc 12, 20, .LBB1_2 ; P9-NEXT: b .LBB1_1 ; P9-NEXT: .LBB1_1: # %if.true ; P9-NEXT: bl foo ; P9-NEXT: nop -; P9-NEXT: stw 3, 120(1) # 4-byte Folded Spill +; P9-NEXT: stw 3, 108(1) # 4-byte Folded Spill ; P9-NEXT: b .LBB1_3 ; P9-NEXT: .LBB1_2: # %if.false ; P9-NEXT: bl bar ; P9-NEXT: nop -; P9-NEXT: stw 3, 120(1) # 4-byte Folded Spill +; P9-NEXT: stw 3, 108(1) # 4-byte Folded Spill ; P9-NEXT: .LBB1_3: # %final -; P9-NEXT: lwz 3, 120(1) # 4-byte Folded Reload +; P9-NEXT: lwz 3, 108(1) # 4-byte Folded Reload ; P9-NEXT: clrldi 3, 3, 32 -; P9-NEXT: addi 1, 1, 128 +; P9-NEXT: addi 1, 1, 112 ; P9-NEXT: ld 0, 16(1) ; P9-NEXT: mtlr 0 ; P9-NEXT: blr @@ -175,10 +160,8 @@ define i32 @test_choice3(fp128 %a, fp128 %b) #0 { ; P8-NEXT: bl __ltkf2 ; P8-NEXT: nop ; P8-NEXT: # kill: def $r3 killed $r3 killed $x3 -; P8-NEXT: not 3, 3 -; P8-NEXT: srwi 3, 3, 31 -; P8-NEXT: cmplwi 3, 0 -; P8-NEXT: bne 0, .LBB2_2 +; P8-NEXT: cmpwi 3, -1 +; P8-NEXT: bgt 0, .LBB2_2 ; P8-NEXT: b .LBB2_1 ; P8-NEXT: .LBB2_1: # %if.true ; P8-NEXT: bl foo diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll b/llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll index 84d4614..045b67b 100644 --- a/llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll +++ b/llvm/test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll @@ -2,7 +2,7 @@ define i1 @TestULT(double %t0) { ; CHECK-LABEL: TestULT: -; CHECK: xscmpudp +; CHECK: fcmpu ; CHECK: blr entry: %t1 = fcmp ult double %t0, 0.000000e+00 @@ -49,7 +49,7 @@ good: define i1 @TestUEQ(double %t0) { ; CHECK-LABEL: TestUEQ: -; CHECK: xscmpudp +; CHECK: fcmpu ; CHECK: blr entry: %t1 = fcmp ueq double %t0, 0.000000e+00 @@ -64,7 +64,7 @@ good: define i1 @TestUGT(double %t0) { ; CHECK-LABEL: TestUGT: -; CHECK: xscmpudp +; CHECK: fcmpu ; CHECK: blr entry: %t1 = fcmp ugt double %t0, 0.000000e+00 @@ -111,7 +111,7 @@ good: define i1 @TestOLE(double %t0) { ; CHECK-LABEL: TestOLE: -; CHECK: xscmpudp +; CHECK: fcmpu ; CHECK: blr entry: %t1 = fcmp ole double %t0, 0.000000e+00 @@ -126,7 +126,7 @@ good: define i1 @TestONE(double %t0) { ; CHECK-LABEL: TestONE: -; CHECK: xscmpudp +; CHECK: fcmpu ; CHECK: blr entry: %t1 = fcmp one double %t0, 0.000000e+00 @@ -173,7 +173,7 @@ good: define i1 @TestOGE(double %t0) { ; CHECK-LABEL: TestOGE: -; CHECK: xscmpudp +; CHECK: fcmpu ; CHECK: blr entry: %t1 = fcmp oge double %t0, 0.000000e+00 diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll index 6b6703f..d6e0dde 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll @@ -5,25 +5,19 @@ define i32 @une_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 { ; CHECK-LABEL: une_ppcf128: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscmpudp cr7, f1, f3 -; CHECK-NEXT: mfocrf r3, 1 -; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 -; CHECK-NEXT: xscmpudp cr7, f2, f4 -; CHECK-NEXT: mfocrf r4, 1 -; CHECK-NEXT: rlwinm r4, r4, 31, 31, 31 -; CHECK-NEXT: xori r4, r4, 1 -; CHECK-NEXT: and r4, r3, r4 -; CHECK-NEXT: xscmpudp cr7, f1, f3 -; CHECK-NEXT: mfocrf r3, 1 -; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 -; CHECK-NEXT: xori r3, r3, 1 -; CHECK-NEXT: xscmpudp cr7, f1, f3 -; CHECK-NEXT: mfocrf r5, 1 -; CHECK-NEXT: rlwinm r5, r5, 31, 31, 31 -; CHECK-NEXT: xori r5, r5, 1 -; CHECK-NEXT: and r3, r3, r5 -; CHECK-NEXT: or r3, r3, r4 -; CHECK-NEXT: # kill: def $r4 killed $r3 +; CHECK-NEXT: fcmpu cr0, f1, f3 +; CHECK-NEXT: crmove 4*cr5+lt, eq +; CHECK-NEXT: fcmpu cr1, f2, f4 +; CHECK-NEXT: crmove 4*cr5+gt, 4*cr1+eq +; CHECK-NEXT: crnot 4*cr5+gt, 4*cr5+gt +; CHECK-NEXT: crand 4*cr5+gt, 4*cr5+lt, 4*cr5+gt +; CHECK-NEXT: crmove 4*cr5+lt, eq +; CHECK-NEXT: crnot 4*cr5+lt, 4*cr5+lt +; CHECK-NEXT: crand 4*cr5+lt, 4*cr5+lt, 4*cr5+lt +; CHECK-NEXT: cror 4*cr5+lt, 4*cr5+lt, 4*cr5+gt +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: isel r3, r3, r4, 4*cr5+lt ; CHECK-NEXT: clrldi r3, r3, 32 ; CHECK-NEXT: blr entry: @@ -36,28 +30,19 @@ entry: define i32 @ogt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 { ; CHECK-LABEL: ogt_ppcf128: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscmpudp cr7, f1, f3 -; CHECK-NEXT: mfocrf r3, 1 -; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 -; CHECK-NEXT: xscmpudp cr7, f2, f4 -; CHECK-NEXT: mfocrf r4, 1 -; CHECK-NEXT: rlwinm r4, r4, 30, 31, 31 -; CHECK-NEXT: and r4, r3, r4 -; CHECK-NEXT: xscmpudp cr0, f1, f3 -; CHECK-NEXT: mfocrf r3, 128 -; CHECK-NEXT: stw r3, -4(r1) -; CHECK-NEXT: xscmpudp cr7, f1, f3 -; CHECK-NEXT: mfocrf r3, 1 -; CHECK-NEXT: lwz r5, -4(r1) -; CHECK-NEXT: rotlwi r5, r5, 4 -; CHECK-NEXT: mtocrf 1, r5 -; CHECK-NEXT: rlwinm r5, r3, 30, 31, 31 -; CHECK-NEXT: mfocrf r3, 1 -; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 -; CHECK-NEXT: xori r3, r3, 1 -; CHECK-NEXT: and r3, r3, r5 -; CHECK-NEXT: or r3, r3, r4 -; CHECK-NEXT: # kill: def $r4 killed $r3 +; CHECK-NEXT: fcmpu cr0, f1, f3 +; CHECK-NEXT: crmove 4*cr5+lt, eq +; CHECK-NEXT: fcmpu cr1, f2, f4 +; CHECK-NEXT: crmove 4*cr5+gt, 4*cr1+gt +; CHECK-NEXT: crand 4*cr5+gt, 4*cr5+lt, 4*cr5+gt +; CHECK-NEXT: crmove 4*cr5+lt, eq +; CHECK-NEXT: crnot 4*cr5+lt, 4*cr5+lt +; CHECK-NEXT: crmove 4*cr5+eq, gt +; CHECK-NEXT: crand 4*cr5+lt, 4*cr5+lt, 4*cr5+eq +; CHECK-NEXT: cror 4*cr5+lt, 4*cr5+lt, 4*cr5+gt +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: isel r3, r3, r4, 4*cr5+lt ; CHECK-NEXT: clrldi r3, r3, 32 ; CHECK-NEXT: blr entry: @@ -69,12 +54,14 @@ entry: define i1 @test_f128(fp128 %a, fp128 %b) #0 { ; CHECK-LABEL: test_f128: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscmpuqp cr7, v2, v3 -; CHECK-NEXT: mfocrf r3, 1 -; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 -; CHECK-NEXT: xori r4, r3, 1 -; CHECK-NEXT: # implicit-def: $x3 -; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: xscmpuqp cr0, v2, v3 +; CHECK-NEXT: crmove 4*cr5+lt, eq +; CHECK-NEXT: xscmpuqp cr0, v2, v3 +; CHECK-NEXT: crmove 4*cr5+gt, eq +; CHECK-NEXT: crnor 4*cr5+lt, 4*cr5+lt, 4*cr5+gt +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: isel r3, r3, r4, 4*cr5+lt ; CHECK-NEXT: blr entry: %0 = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"une", metadata !"fpexcept.strict") #0 @@ -84,11 +71,9 @@ entry: define i1 @testbr_f64(double %a, double %b) #0 { ; CHECK-LABEL: testbr_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xscmpudp cr7, f1, f2 -; CHECK-NEXT: mfocrf r3, 1 -; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 -; CHECK-NEXT: cmplwi r3, 0 -; CHECK-NEXT: bne cr0, .LBB3_2 +; CHECK-NEXT: fcmpu cr0, f1, f2 +; CHECK-NEXT: crmove 4*cr5+lt, eq +; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB3_2 ; CHECK-NEXT: b .LBB3_1 ; CHECK-NEXT: .LBB3_1: # %tr ; CHECK-NEXT: li r3, -1 @@ -108,11 +93,9 @@ fl: define i1 @testbr_f32(float %a, float %b) #0 { ; CHECK-LABEL: testbr_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fcmpu cr7, f1, f2 -; CHECK-NEXT: mfocrf r3, 1 -; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 -; CHECK-NEXT: cmplwi r3, 0 -; CHECK-NEXT: bne cr0, .LBB4_2 +; CHECK-NEXT: fcmpu cr0, f1, f2 +; CHECK-NEXT: crmove 4*cr5+lt, eq +; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB4_2 ; CHECK-NEXT: b .LBB4_1 ; CHECK-NEXT: .LBB4_1: # %tr ; CHECK-NEXT: li r3, -1 diff --git a/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll b/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll index 627db54..4d55737 100644 --- a/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll +++ b/llvm/test/CodeGen/PowerPC/fp64-to-int16.ll @@ -7,11 +7,11 @@ define i1 @Test(double %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xscvdpsxws 0, 1 ; CHECK-NEXT: mffprwz 3, 0 -; CHECK-NEXT: xori 3, 3, 65534 -; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: srwi 4, 3, 5 -; CHECK-NEXT: # implicit-def: $x3 -; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: cmplwi 3, 65534 +; CHECK-NEXT: crmove 20, 2 +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: isel 3, 3, 4, 20 ; CHECK-NEXT: blr entry: %conv = fptoui double %a to i16 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-byte-loads.ll b/llvm/test/CodeGen/PowerPC/pcrel-byte-loads.ll index 998dc3c..3082c04 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-byte-loads.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-byte-loads.ll @@ -45,6 +45,7 @@ define void @i32_ZextLoad_i1() { ; CHECK-LE-LABEL: i32_ZextLoad_i1: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: plbz r3, GlobLd1@PCREL(0), 1 +; CHECK-LE-NEXT: clrldi r3, r3, 63 ; CHECK-LE-NEXT: pstb r3, GlobSt1@PCREL(0), 1 ; CHECK-LE-NEXT: blr ; @@ -53,6 +54,7 @@ define void @i32_ZextLoad_i1() { ; CHECK-BE-NEXT: addis r3, r2, GlobLd1@toc@ha ; CHECK-BE-NEXT: addis r4, r2, GlobSt1@toc@ha ; CHECK-BE-NEXT: lbz r3, GlobLd1@toc@l(r3) +; CHECK-BE-NEXT: clrldi r3, r3, 63 ; CHECK-BE-NEXT: stb r3, GlobSt1@toc@l(r4) ; CHECK-BE-NEXT: blr entry: @@ -77,11 +79,11 @@ define dso_local i1 @i32_ExtLoad_i1() local_unnamed_addr #0 { ; CHECK-LE-NEXT: paddi r3, 0, Glob1@PCREL, 1 ; CHECK-LE-NEXT: paddi r4, 0, Glob2@PCREL, 1 ; CHECK-LE-NEXT: bl Decl@notoc -; CHECK-LE-NEXT: plbz r4, GlobLd1@PCREL(0), 1 -; CHECK-LE-NEXT: cmplwi r3, 0 -; CHECK-LE-NEXT: li r3, 1 -; CHECK-LE-NEXT: iseleq r3, 0, r3 -; CHECK-LE-NEXT: and r3, r3, r4 +; CHECK-LE-NEXT: cmpwi cr1, r3, 0 +; CHECK-LE-NEXT: plbz r3, GlobLd1@PCREL(0), 1 +; CHECK-LE-NEXT: andi. r3, r3, 1 +; CHECK-LE-NEXT: crandc 4*cr5+lt, gt, 4*cr1+eq +; CHECK-LE-NEXT: setbc r3, 4*cr5+lt ; CHECK-LE-NEXT: addi r1, r1, 32 ; CHECK-LE-NEXT: ld r0, 16(r1) ; CHECK-LE-NEXT: mtlr r0 @@ -100,12 +102,12 @@ define dso_local i1 @i32_ExtLoad_i1() local_unnamed_addr #0 { ; CHECK-BE-NEXT: addi r4, r4, Glob2@toc@l ; CHECK-BE-NEXT: bl Decl ; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: addis r4, r2, GlobLd1@toc@ha -; CHECK-BE-NEXT: cmplwi r3, 0 -; CHECK-BE-NEXT: li r3, 1 -; CHECK-BE-NEXT: lbz r4, GlobLd1@toc@l(r4) -; CHECK-BE-NEXT: iseleq r3, 0, r3 -; CHECK-BE-NEXT: and r3, r3, r4 +; CHECK-BE-NEXT: cmpwi cr1, r3, 0 +; CHECK-BE-NEXT: addis r3, r2, GlobLd1@toc@ha +; CHECK-BE-NEXT: lbz r3, GlobLd1@toc@l(r3) +; CHECK-BE-NEXT: andi. r3, r3, 1 +; CHECK-BE-NEXT: crandc 4*cr5+lt, gt, 4*cr1+eq +; CHECK-BE-NEXT: setbc r3, 4*cr5+lt ; CHECK-BE-NEXT: addi r1, r1, 112 ; CHECK-BE-NEXT: ld r0, 16(r1) ; CHECK-BE-NEXT: mtlr r0 diff --git a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll index 066a04f..83fa3fba 100644 --- a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll @@ -227,7 +227,7 @@ define i1 @test_urem_oversized(i66 %X) nounwind { ; PPC-NEXT: slwi 5, 5, 1 ; PPC-NEXT: add 6, 6, 9 ; PPC-NEXT: add 3, 5, 3 -; PPC-NEXT: rotlwi 8, 11, 31 +; PPC-NEXT: rotlwi 8, 11, 31 ; PPC-NEXT: sub 4, 6, 4 ; PPC-NEXT: lis 5, -5526 ; PPC-NEXT: rlwimi 8, 7, 31, 0, 0 @@ -235,7 +235,7 @@ define i1 @test_urem_oversized(i66 %X) nounwind { ; PPC-NEXT: add 3, 4, 3 ; PPC-NEXT: ori 5, 5, 61135 ; PPC-NEXT: rlwimi 7, 3, 31, 0, 0 -; PPC-NEXT: cmplw 8, 5 +; PPC-NEXT: cmplw 8, 5 ; PPC-NEXT: cmplwi 1, 7, 13 ; PPC-NEXT: rlwinm 3, 3, 31, 31, 31 ; PPC-NEXT: crand 20, 6, 0 diff --git a/llvm/test/CodeGen/RISCV/machine-cse.ll b/llvm/test/CodeGen/RISCV/machine-cse.ll index cd8ae1c..2e36f3c 100644 --- a/llvm/test/CodeGen/RISCV/machine-cse.ll +++ b/llvm/test/CodeGen/RISCV/machine-cse.ll @@ -504,3 +504,207 @@ trueblock: falseblock: ret void } + +define void @commute_fadd_f16(half %x, half %y, half* %p1, half* %p2, i1 zeroext %cond) { +; RV32-LABEL: commute_fadd_f16: +; RV32: # %bb.0: +; RV32-NEXT: fadd.h ft0, fa0, fa1 +; RV32-NEXT: fsh ft0, 0(a0) +; RV32-NEXT: beqz a2, .LBB14_2 +; RV32-NEXT: # %bb.1: # %trueblock +; RV32-NEXT: fsh ft0, 0(a0) +; RV32-NEXT: .LBB14_2: # %falseblock +; RV32-NEXT: ret +; +; RV64-LABEL: commute_fadd_f16: +; RV64: # %bb.0: +; RV64-NEXT: fadd.h ft0, fa0, fa1 +; RV64-NEXT: fsh ft0, 0(a0) +; RV64-NEXT: beqz a2, .LBB14_2 +; RV64-NEXT: # %bb.1: # %trueblock +; RV64-NEXT: fsh ft0, 0(a0) +; RV64-NEXT: .LBB14_2: # %falseblock +; RV64-NEXT: ret + %a = fadd half %x, %y + store half %a, half* %p1 + br i1 %cond, label %trueblock, label %falseblock + +trueblock: + %b = fadd half %y, %x + store half %b, half* %p1 + br label %falseblock + +falseblock: + ret void +} + +define void @commute_fadd_f32(float %x, float %y, float* %p1, float* %p2, i1 zeroext %cond) { +; RV32-LABEL: commute_fadd_f32: +; RV32: # %bb.0: +; RV32-NEXT: fadd.s ft0, fa0, fa1 +; RV32-NEXT: fsw ft0, 0(a0) +; RV32-NEXT: beqz a2, .LBB15_2 +; RV32-NEXT: # %bb.1: # %trueblock +; RV32-NEXT: fsw ft0, 0(a0) +; RV32-NEXT: .LBB15_2: # %falseblock +; RV32-NEXT: ret +; +; RV64-LABEL: commute_fadd_f32: +; RV64: # %bb.0: +; RV64-NEXT: fadd.s ft0, fa0, fa1 +; RV64-NEXT: fsw ft0, 0(a0) +; RV64-NEXT: beqz a2, .LBB15_2 +; RV64-NEXT: # %bb.1: # %trueblock +; RV64-NEXT: fsw ft0, 0(a0) +; RV64-NEXT: .LBB15_2: # %falseblock +; RV64-NEXT: ret + %a = fadd float %x, %y + store float %a, float* %p1 + br i1 %cond, label %trueblock, label %falseblock + +trueblock: + %b = fadd float %y, %x + store float %b, float* %p1 + br label %falseblock + +falseblock: + ret void +} + +define void @commute_fadd_f64(double %x, double %y, double* %p1, double* %p2, i1 zeroext %cond) { +; RV32-LABEL: commute_fadd_f64: +; RV32: # %bb.0: +; RV32-NEXT: fadd.d ft0, fa0, fa1 +; RV32-NEXT: fsd ft0, 0(a0) +; RV32-NEXT: beqz a2, .LBB16_2 +; RV32-NEXT: # %bb.1: # %trueblock +; RV32-NEXT: fsd ft0, 0(a0) +; RV32-NEXT: .LBB16_2: # %falseblock +; RV32-NEXT: ret +; +; RV64-LABEL: commute_fadd_f64: +; RV64: # %bb.0: +; RV64-NEXT: fadd.d ft0, fa0, fa1 +; RV64-NEXT: fsd ft0, 0(a0) +; RV64-NEXT: beqz a2, .LBB16_2 +; RV64-NEXT: # %bb.1: # %trueblock +; RV64-NEXT: fsd ft0, 0(a0) +; RV64-NEXT: .LBB16_2: # %falseblock +; RV64-NEXT: ret + %a = fadd double %x, %y + store double %a, double* %p1 + br i1 %cond, label %trueblock, label %falseblock + +trueblock: + %b = fadd double %y, %x + store double %b, double* %p1 + br label %falseblock + +falseblock: + ret void +} + +define void @commute_feq_f16(half %x, half %y, i8* %p1, i8* %p2, i1 zeroext %cond) { +; RV32-LABEL: commute_feq_f16: +; RV32: # %bb.0: +; RV32-NEXT: feq.h a1, fa0, fa1 +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: beqz a2, .LBB17_2 +; RV32-NEXT: # %bb.1: # %trueblock +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: .LBB17_2: # %falseblock +; RV32-NEXT: ret +; +; RV64-LABEL: commute_feq_f16: +; RV64: # %bb.0: +; RV64-NEXT: feq.h a1, fa0, fa1 +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: beqz a2, .LBB17_2 +; RV64-NEXT: # %bb.1: # %trueblock +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: .LBB17_2: # %falseblock +; RV64-NEXT: ret + %a = fcmp oeq half %x, %y + %b = zext i1 %a to i8 + store i8 %b, i8* %p1 + br i1 %cond, label %trueblock, label %falseblock + +trueblock: + %c = fcmp oeq half %y, %x + %d = zext i1 %c to i8 + store i8 %d, i8* %p1 + br label %falseblock + +falseblock: + ret void +} + +define void @commute_feq_f32(float %x, float %y, i8* %p1, i8* %p2, i1 zeroext %cond) { +; RV32-LABEL: commute_feq_f32: +; RV32: # %bb.0: +; RV32-NEXT: feq.s a1, fa0, fa1 +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: beqz a2, .LBB18_2 +; RV32-NEXT: # %bb.1: # %trueblock +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: .LBB18_2: # %falseblock +; RV32-NEXT: ret +; +; RV64-LABEL: commute_feq_f32: +; RV64: # %bb.0: +; RV64-NEXT: feq.s a1, fa0, fa1 +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: beqz a2, .LBB18_2 +; RV64-NEXT: # %bb.1: # %trueblock +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: .LBB18_2: # %falseblock +; RV64-NEXT: ret + %a = fcmp oeq float %x, %y + %b = zext i1 %a to i8 + store i8 %b, i8* %p1 + br i1 %cond, label %trueblock, label %falseblock + +trueblock: + %c = fcmp oeq float %y, %x + %d = zext i1 %c to i8 + store i8 %d, i8* %p1 + br label %falseblock + +falseblock: + ret void +} + +define void @commute_feq_f64(double %x, double %y, i8* %p1, i8* %p2, i1 zeroext %cond) { +; RV32-LABEL: commute_feq_f64: +; RV32: # %bb.0: +; RV32-NEXT: feq.d a1, fa0, fa1 +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: beqz a2, .LBB19_2 +; RV32-NEXT: # %bb.1: # %trueblock +; RV32-NEXT: sb a1, 0(a0) +; RV32-NEXT: .LBB19_2: # %falseblock +; RV32-NEXT: ret +; +; RV64-LABEL: commute_feq_f64: +; RV64: # %bb.0: +; RV64-NEXT: feq.d a1, fa0, fa1 +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: beqz a2, .LBB19_2 +; RV64-NEXT: # %bb.1: # %trueblock +; RV64-NEXT: sb a1, 0(a0) +; RV64-NEXT: .LBB19_2: # %falseblock +; RV64-NEXT: ret + %a = fcmp oeq double %x, %y + %b = zext i1 %a to i8 + store i8 %b, i8* %p1 + br i1 %cond, label %trueblock, label %falseblock + +trueblock: + %c = fcmp oeq double %y, %x + %d = zext i1 %c to i8 + store i8 %d, i8* %p1 + br label %falseblock + +falseblock: + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll b/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll index deac535..26c7509 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll @@ -1431,7 +1431,9 @@ define <vscale x 1 x i1> @intrinsic_vmsge_mask_vx_nxv1i64_i64(<vscale x 1 x i64> ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; RV64-NEXT: vmslt.vx v0, v8, a0, v0.t +; RV64-NEXT: vmslt.vx v8, v8, a0, v0.t +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; RV64-NEXT: vmxor.mm v0, v8, v0 ; RV64-NEXT: ret entry: %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64.i64( diff --git a/llvm/test/CodeGen/RISCV/split-offsets.ll b/llvm/test/CodeGen/RISCV/split-offsets.ll index d08157b8..35671d8 100644 --- a/llvm/test/CodeGen/RISCV/split-offsets.ll +++ b/llvm/test/CodeGen/RISCV/split-offsets.ll @@ -118,3 +118,38 @@ while_end: ret void } +; GEPs have been manually split so the base GEP does not get used by any memory +; instructions. Make sure we use a small offset in each of the stores. +define void @test3([65536 x i32]* %t) { +; RV32I-LABEL: test3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a1, 20 +; RV32I-NEXT: addi a1, a1, -1920 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: li a1, 3 +; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: test3: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a1, 20 +; RV64I-NEXT: addiw a1, a1, -1920 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: sw a1, 4(a0) +; RV64I-NEXT: li a1, 3 +; RV64I-NEXT: sw a1, 8(a0) +; RV64I-NEXT: ret +entry: + %0 = bitcast [65536 x i32]* %t to i8* + %splitgep = getelementptr i8, i8* %0, i64 80000 + %1 = getelementptr i8, i8* %splitgep, i64 4 + %2 = bitcast i8* %1 to i32* + %3 = getelementptr i8, i8* %splitgep, i64 8 + %4 = bitcast i8* %3 to i32* + store i32 2, i32* %2, align 4 + store i32 3, i32* %4, align 4 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll index 3d95509..e86cedc 100644 --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -4040,6 +4040,236 @@ continue: ret i1 true } +define zeroext i1 @uaddo.i64.constant(i64 %v1, i64* %res) { +; RV32-LABEL: uaddo.i64.constant: +; RV32: # %bb.0: # %entry +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: addi a4, a0, 2 +; RV32-NEXT: sltu a0, a4, a0 +; RV32-NEXT: add a5, a1, a0 +; RV32-NEXT: bgeu a4, a3, .LBB66_2 +; RV32-NEXT: # %bb.1: # %entry +; RV32-NEXT: sltu a0, a5, a1 +; RV32-NEXT: .LBB66_2: # %entry +; RV32-NEXT: sw a4, 0(a2) +; RV32-NEXT: sw a5, 4(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: uaddo.i64.constant: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addi a2, a0, 2 +; RV64-NEXT: sltu a0, a2, a0 +; RV64-NEXT: sd a2, 0(a1) +; RV64-NEXT: ret +; +; RV32ZBA-LABEL: uaddo.i64.constant: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mv a3, a0 +; RV32ZBA-NEXT: addi a4, a0, 2 +; RV32ZBA-NEXT: sltu a0, a4, a0 +; RV32ZBA-NEXT: add a5, a1, a0 +; RV32ZBA-NEXT: bgeu a4, a3, .LBB66_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: sltu a0, a5, a1 +; RV32ZBA-NEXT: .LBB66_2: # %entry +; RV32ZBA-NEXT: sw a4, 0(a2) +; RV32ZBA-NEXT: sw a5, 4(a2) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: uaddo.i64.constant: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addi a2, a0, 2 +; RV64ZBA-NEXT: sltu a0, a2, a0 +; RV64ZBA-NEXT: sd a2, 0(a1) +; RV64ZBA-NEXT: ret +entry: + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64* %res + ret i1 %obit +} + +define zeroext i1 @uaddo.i64.constant_2048(i64 %v1, i64* %res) { +; RV32-LABEL: uaddo.i64.constant_2048: +; RV32: # %bb.0: # %entry +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: addi a0, a0, 1024 +; RV32-NEXT: addi a4, a0, 1024 +; RV32-NEXT: sltu a0, a4, a3 +; RV32-NEXT: add a5, a1, a0 +; RV32-NEXT: bgeu a4, a3, .LBB67_2 +; RV32-NEXT: # %bb.1: # %entry +; RV32-NEXT: sltu a0, a5, a1 +; RV32-NEXT: .LBB67_2: # %entry +; RV32-NEXT: sw a4, 0(a2) +; RV32-NEXT: sw a5, 4(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: uaddo.i64.constant_2048: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addi a2, a0, 1024 +; RV64-NEXT: addi a2, a2, 1024 +; RV64-NEXT: sltu a0, a2, a0 +; RV64-NEXT: sd a2, 0(a1) +; RV64-NEXT: ret +; +; RV32ZBA-LABEL: uaddo.i64.constant_2048: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mv a3, a0 +; RV32ZBA-NEXT: addi a0, a0, 1024 +; RV32ZBA-NEXT: addi a4, a0, 1024 +; RV32ZBA-NEXT: sltu a0, a4, a3 +; RV32ZBA-NEXT: add a5, a1, a0 +; RV32ZBA-NEXT: bgeu a4, a3, .LBB67_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: sltu a0, a5, a1 +; RV32ZBA-NEXT: .LBB67_2: # %entry +; RV32ZBA-NEXT: sw a4, 0(a2) +; RV32ZBA-NEXT: sw a5, 4(a2) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: uaddo.i64.constant_2048: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addi a2, a0, 1024 +; RV64ZBA-NEXT: addi a2, a2, 1024 +; RV64ZBA-NEXT: sltu a0, a2, a0 +; RV64ZBA-NEXT: sd a2, 0(a1) +; RV64ZBA-NEXT: ret +entry: + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2048) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64* %res + ret i1 %obit +} + +define zeroext i1 @uaddo.i64.constant_2049(i64 %v1, i64* %res) { +; RV32-LABEL: uaddo.i64.constant_2049: +; RV32: # %bb.0: # %entry +; RV32-NEXT: mv a3, a0 +; RV32-NEXT: addi a0, a0, 1025 +; RV32-NEXT: addi a4, a0, 1024 +; RV32-NEXT: sltu a0, a4, a3 +; RV32-NEXT: add a5, a1, a0 +; RV32-NEXT: bgeu a4, a3, .LBB68_2 +; RV32-NEXT: # %bb.1: # %entry +; RV32-NEXT: sltu a0, a5, a1 +; RV32-NEXT: .LBB68_2: # %entry +; RV32-NEXT: sw a4, 0(a2) +; RV32-NEXT: sw a5, 4(a2) +; RV32-NEXT: ret +; +; RV64-LABEL: uaddo.i64.constant_2049: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addi a2, a0, 1025 +; RV64-NEXT: addi a2, a2, 1024 +; RV64-NEXT: sltu a0, a2, a0 +; RV64-NEXT: sd a2, 0(a1) +; RV64-NEXT: ret +; +; RV32ZBA-LABEL: uaddo.i64.constant_2049: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: mv a3, a0 +; RV32ZBA-NEXT: addi a0, a0, 1025 +; RV32ZBA-NEXT: addi a4, a0, 1024 +; RV32ZBA-NEXT: sltu a0, a4, a3 +; RV32ZBA-NEXT: add a5, a1, a0 +; RV32ZBA-NEXT: bgeu a4, a3, .LBB68_2 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: sltu a0, a5, a1 +; RV32ZBA-NEXT: .LBB68_2: # %entry +; RV32ZBA-NEXT: sw a4, 0(a2) +; RV32ZBA-NEXT: sw a5, 4(a2) +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: uaddo.i64.constant_2049: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addi a2, a0, 1025 +; RV64ZBA-NEXT: addi a2, a2, 1024 +; RV64ZBA-NEXT: sltu a0, a2, a0 +; RV64ZBA-NEXT: sd a2, 0(a1) +; RV64ZBA-NEXT: ret +entry: + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2049) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64* %res + ret i1 %obit +} + +define i64 @uaddo.i64.constant_setcc_on_overflow_flag(i64* %p) { +; RV32-LABEL: uaddo.i64.constant_setcc_on_overflow_flag: +; RV32: # %bb.0: # %entry +; RV32-NEXT: lw a4, 0(a0) +; RV32-NEXT: lw a3, 4(a0) +; RV32-NEXT: addi a0, a4, 2 +; RV32-NEXT: sltu a2, a0, a4 +; RV32-NEXT: add a1, a3, a2 +; RV32-NEXT: bltu a0, a4, .LBB69_3 +; RV32-NEXT: # %bb.1: # %entry +; RV32-NEXT: beqz a2, .LBB69_4 +; RV32-NEXT: .LBB69_2: # %IfNoOverflow +; RV32-NEXT: ret +; RV32-NEXT: .LBB69_3: # %entry +; RV32-NEXT: sltu a2, a1, a3 +; RV32-NEXT: bnez a2, .LBB69_2 +; RV32-NEXT: .LBB69_4: # %IfOverflow +; RV32-NEXT: li a0, 0 +; RV32-NEXT: li a1, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: uaddo.i64.constant_setcc_on_overflow_flag: +; RV64: # %bb.0: # %entry +; RV64-NEXT: ld a1, 0(a0) +; RV64-NEXT: addi a0, a1, 2 +; RV64-NEXT: bltu a0, a1, .LBB69_2 +; RV64-NEXT: # %bb.1: # %IfOverflow +; RV64-NEXT: li a0, 0 +; RV64-NEXT: .LBB69_2: # %IfNoOverflow +; RV64-NEXT: ret +; +; RV32ZBA-LABEL: uaddo.i64.constant_setcc_on_overflow_flag: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: lw a4, 0(a0) +; RV32ZBA-NEXT: lw a3, 4(a0) +; RV32ZBA-NEXT: addi a0, a4, 2 +; RV32ZBA-NEXT: sltu a2, a0, a4 +; RV32ZBA-NEXT: add a1, a3, a2 +; RV32ZBA-NEXT: bltu a0, a4, .LBB69_3 +; RV32ZBA-NEXT: # %bb.1: # %entry +; RV32ZBA-NEXT: beqz a2, .LBB69_4 +; RV32ZBA-NEXT: .LBB69_2: # %IfNoOverflow +; RV32ZBA-NEXT: ret +; RV32ZBA-NEXT: .LBB69_3: # %entry +; RV32ZBA-NEXT: sltu a2, a1, a3 +; RV32ZBA-NEXT: bnez a2, .LBB69_2 +; RV32ZBA-NEXT: .LBB69_4: # %IfOverflow +; RV32ZBA-NEXT: li a0, 0 +; RV32ZBA-NEXT: li a1, 0 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: uaddo.i64.constant_setcc_on_overflow_flag: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: ld a1, 0(a0) +; RV64ZBA-NEXT: addi a0, a1, 2 +; RV64ZBA-NEXT: bltu a0, a1, .LBB69_2 +; RV64ZBA-NEXT: # %bb.1: # %IfOverflow +; RV64ZBA-NEXT: li a0, 0 +; RV64ZBA-NEXT: .LBB69_2: # %IfNoOverflow +; RV64ZBA-NEXT: ret +entry: + %v1 = load i64, i64* %p + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 2) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + br i1 %obit, label %IfNoOverflow, label %IfOverflow +IfOverflow: + ret i64 0 +IfNoOverflow: + ret i64 %val +} + declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone diff --git a/llvm/test/CodeGen/Thumb/segmented-stacks.ll b/llvm/test/CodeGen/Thumb/segmented-stacks.ll index 6ad8cf9..daebafe 100644 --- a/llvm/test/CodeGen/Thumb/segmented-stacks.ll +++ b/llvm/test/CodeGen/Thumb/segmented-stacks.ll @@ -275,4 +275,24 @@ define void @test_nostack() #0 { ; Thumb-linux-NOT: bl __morestack } + +declare void @panic() unnamed_addr + +; We used to crash while compiling the following function. +; Thumb-linux-LABEL: build_should_not_segfault: +; Thumb-android-LABEL: build_should_not_segfault: +define void @build_should_not_segfault(i8 %x) unnamed_addr #0 { +start: + %_0 = icmp ult i8 %x, 16 + %or.cond = select i1 undef, i1 true, i1 %_0 + br i1 %or.cond, label %bb1, label %bb2 + +bb1: + ret void + +bb2: + call void @panic() + unreachable +} + attributes #0 = { "split-stack" } diff --git a/llvm/test/CodeGen/Thumb2/segmented-stacks.ll b/llvm/test/CodeGen/Thumb2/segmented-stacks.ll index 0f34e9c..70892073 100644 --- a/llvm/test/CodeGen/Thumb2/segmented-stacks.ll +++ b/llvm/test/CodeGen/Thumb2/segmented-stacks.ll @@ -171,4 +171,24 @@ define fastcc void @test_fastcc_large() #0 { ; ARM-NEXT: .long 40192 } + +declare void @panic() unnamed_addr + +; We used to crash while compiling the following function. +; THUMB-LABEL: build_should_not_segfault: +; ARM-LABEL: build_should_not_segfault: +define void @build_should_not_segfault(i8 %x) unnamed_addr #0 { +start: + %_0 = icmp ult i8 %x, 16 + %or.cond = select i1 undef, i1 true, i1 %_0 + br i1 %or.cond, label %bb1, label %bb2 + +bb1: + ret void + +bb2: + call void @panic() + unreachable +} + attributes #0 = { "split-stack" } diff --git a/llvm/test/CodeGen/X86/add-and-not.ll b/llvm/test/CodeGen/X86/add-and-not.ll new file mode 100644 index 0000000..bf8e507 --- /dev/null +++ b/llvm/test/CodeGen/X86/add-and-not.ll @@ -0,0 +1,205 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s + +declare void @use(i8) + +define i8 @add_and_xor(i8 %x, i8 %y) { +; CHECK-LABEL: add_and_xor: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq + %xor = xor i8 %x, -1 + %and = and i8 %xor, %y + %add = add i8 %and, %x + ret i8 %add +} + +define i8 @add_and_xor_wrong_const(i8 %x, i8 %y) { +; CHECK-LABEL: add_and_xor_wrong_const: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: xorb $-2, %al +; CHECK-NEXT: andb %sil, %al +; CHECK-NEXT: addb %dil, %al +; CHECK-NEXT: retq + %xor = xor i8 %x, -2 + %and = and i8 %xor, %y + %add = add i8 %and, %x + ret i8 %add +} + +define i8 @add_and_xor_wrong_op(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: add_and_xor_wrong_op: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: notb %dl +; CHECK-NEXT: andb %sil, %dl +; CHECK-NEXT: leal (%rdx,%rdi), %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq + %xor = xor i8 %z, -1 + %and = and i8 %xor, %y + %add = add i8 %and, %x + ret i8 %add +} + +define i8 @add_and_xor_commuted1(i8 %x, i8 %y) { +; CHECK-LABEL: add_and_xor_commuted1: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq + %xor = xor i8 %x, -1 + %and = and i8 %y, %xor + %add = add i8 %and, %x + ret i8 %add +} + +define i8 @add_and_xor_commuted2(i8 %x, i8 %y) { +; CHECK-LABEL: add_and_xor_commuted2: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq + %xor = xor i8 %x, -1 + %and = and i8 %xor, %y + %add = add i8 %x, %and + ret i8 %add +} + +define i8 @add_and_xor_commuted3(i8 %x, i8 %y) { +; CHECK-LABEL: add_and_xor_commuted3: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq + %xor = xor i8 %x, -1 + %and = and i8 %y, %xor + %add = add i8 %x, %and + ret i8 %add +} + +define i8 @add_and_xor_extra_use(i8 %x, i8 %y) nounwind { +; CHECK-LABEL: add_and_xor_extra_use: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %r14d +; CHECK-NEXT: movl %r14d, %eax +; CHECK-NEXT: notb %al +; CHECK-NEXT: movzbl %al, %ebp +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: callq use@PLT +; CHECK-NEXT: andb %bl, %bpl +; CHECK-NEXT: movzbl %bpl, %edi +; CHECK-NEXT: callq use@PLT +; CHECK-NEXT: orb %r14b, %bl +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq + %xor = xor i8 %x, -1 + call void @use(i8 %xor) + %and = and i8 %xor, %y + call void @use(i8 %and) + %add = add i8 %and, %x + ret i8 %add +} + +define i64 @add_and_xor_const(i64 %x) { +; CHECK-LABEL: add_and_xor_const: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: retq + %xor = xor i64 %x, -1 + %and = and i64 %xor, 1 + %add = add i64 %and, %x + ret i64 %add +} + +define i64 @add_and_xor_const_wrong_op(i64 %x, i64 %y) { +; CHECK-LABEL: add_and_xor_const_wrong_op: +; CHECK: # %bb.0: +; CHECK-NEXT: notl %esi +; CHECK-NEXT: andl $1, %esi +; CHECK-NEXT: leaq (%rsi,%rdi), %rax +; CHECK-NEXT: retq + %xor = xor i64 %y, -1 + %and = and i64 %xor, 1 + %add = add i64 %and, %x + ret i64 %add +} + +define i64 @add_and_xor_const_explicit_trunc(i64 %x) { +; CHECK-LABEL: add_and_xor_const_explicit_trunc: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: retq + %trunc = trunc i64 %x to i32 + %xor = xor i32 %trunc, -1 + %ext = sext i32 %xor to i64 + %and = and i64 %ext, 1 + %add = add i64 %and, %x + ret i64 %add +} + +define i64 @add_and_xor_const_explicit_trunc_wrong_mask(i64 %x) { +; CHECK-LABEL: add_and_xor_const_explicit_trunc_wrong_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: movslq %eax, %rcx +; CHECK-NEXT: movabsq $4294967297, %rax # imm = 0x100000001 +; CHECK-NEXT: andq %rcx, %rax +; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: retq + %trunc = trunc i64 %x to i32 + %xor = xor i32 %trunc, -1 + %ext = sext i32 %xor to i64 + %and = and i64 %ext, 4294967297 + %add = add i64 %and, %x + ret i64 %add +} + +define i8* @gep_and_xor(i8* %a, i64 %m) { +; CHECK-LABEL: gep_and_xor: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: retq + %old = ptrtoint i8* %a to i64 + %old.not = and i64 %old, %m + %offset = xor i64 %old.not, %m + %p = getelementptr i8, i8* %a, i64 %offset + ret i8* %p +} + +define i8* @gep_and_xor_const(i8* %a) { +; CHECK-LABEL: gep_and_xor_const: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: retq + %old = ptrtoint i8* %a to i64 + %old.not = and i64 %old, 1 + %offset = xor i64 %old.not, 1 + %p = getelementptr i8, i8* %a, i64 %offset + ret i8* %p +} diff --git a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll index d081a2e..d04819a 100644 --- a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll +++ b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 ; The mask is all-ones, potentially shifted. @@ -11,12 +11,12 @@ ; lshr define i8 @test_i8_7_mask_lshr_1(i8 %a0) { -; X32-LABEL: test_i8_7_mask_lshr_1: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $6, %al -; X32-NEXT: shrb %al -; X32-NEXT: retl +; X86-LABEL: test_i8_7_mask_lshr_1: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $6, %al +; X86-NEXT: shrb %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_7_mask_lshr_1: ; X64: # %bb.0: @@ -31,12 +31,12 @@ define i8 @test_i8_7_mask_lshr_1(i8 %a0) { } define i8 @test_i8_28_mask_lshr_1(i8 %a0) { -; X32-LABEL: test_i8_28_mask_lshr_1: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $28, %al -; X32-NEXT: shrb %al -; X32-NEXT: retl +; X86-LABEL: test_i8_28_mask_lshr_1: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $28, %al +; X86-NEXT: shrb %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_lshr_1: ; X64: # %bb.0: @@ -50,12 +50,12 @@ define i8 @test_i8_28_mask_lshr_1(i8 %a0) { ret i8 %t1 } define i8 @test_i8_28_mask_lshr_2(i8 %a0) { -; X32-LABEL: test_i8_28_mask_lshr_2: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $28, %al -; X32-NEXT: shrb $2, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_28_mask_lshr_2: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $28, %al +; X86-NEXT: shrb $2, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_lshr_2: ; X64: # %bb.0: @@ -69,12 +69,12 @@ define i8 @test_i8_28_mask_lshr_2(i8 %a0) { ret i8 %t1 } define i8 @test_i8_28_mask_lshr_3(i8 %a0) { -; X32-LABEL: test_i8_28_mask_lshr_3: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $24, %al -; X32-NEXT: shrb $3, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_28_mask_lshr_3: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $24, %al +; X86-NEXT: shrb $3, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_lshr_3: ; X64: # %bb.0: @@ -88,12 +88,12 @@ define i8 @test_i8_28_mask_lshr_3(i8 %a0) { ret i8 %t1 } define i8 @test_i8_28_mask_lshr_4(i8 %a0) { -; X32-LABEL: test_i8_28_mask_lshr_4: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $16, %al -; X32-NEXT: shrb $4, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_28_mask_lshr_4: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $16, %al +; X86-NEXT: shrb $4, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_lshr_4: ; X64: # %bb.0: @@ -108,12 +108,12 @@ define i8 @test_i8_28_mask_lshr_4(i8 %a0) { } define i8 @test_i8_224_mask_lshr_1(i8 %a0) { -; X32-LABEL: test_i8_224_mask_lshr_1: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $-32, %al -; X32-NEXT: shrb %al -; X32-NEXT: retl +; X86-LABEL: test_i8_224_mask_lshr_1: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $-32, %al +; X86-NEXT: shrb %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_224_mask_lshr_1: ; X64: # %bb.0: @@ -127,12 +127,12 @@ define i8 @test_i8_224_mask_lshr_1(i8 %a0) { ret i8 %t1 } define i8 @test_i8_224_mask_lshr_4(i8 %a0) { -; X32-LABEL: test_i8_224_mask_lshr_4: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $-32, %al -; X32-NEXT: shrb $4, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_224_mask_lshr_4: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $-32, %al +; X86-NEXT: shrb $4, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_224_mask_lshr_4: ; X64: # %bb.0: @@ -146,11 +146,11 @@ define i8 @test_i8_224_mask_lshr_4(i8 %a0) { ret i8 %t1 } define i8 @test_i8_224_mask_lshr_5(i8 %a0) { -; X32-LABEL: test_i8_224_mask_lshr_5: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: shrb $5, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_224_mask_lshr_5: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shrb $5, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_224_mask_lshr_5: ; X64: # %bb.0: @@ -163,11 +163,11 @@ define i8 @test_i8_224_mask_lshr_5(i8 %a0) { ret i8 %t1 } define i8 @test_i8_224_mask_lshr_6(i8 %a0) { -; X32-LABEL: test_i8_224_mask_lshr_6: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: shrb $6, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_224_mask_lshr_6: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shrb $6, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_224_mask_lshr_6: ; X64: # %bb.0: @@ -183,12 +183,12 @@ define i8 @test_i8_224_mask_lshr_6(i8 %a0) { ; ashr define i8 @test_i8_7_mask_ashr_1(i8 %a0) { -; X32-LABEL: test_i8_7_mask_ashr_1: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $6, %al -; X32-NEXT: shrb %al -; X32-NEXT: retl +; X86-LABEL: test_i8_7_mask_ashr_1: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $6, %al +; X86-NEXT: shrb %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_7_mask_ashr_1: ; X64: # %bb.0: @@ -203,12 +203,12 @@ define i8 @test_i8_7_mask_ashr_1(i8 %a0) { } define i8 @test_i8_28_mask_ashr_1(i8 %a0) { -; X32-LABEL: test_i8_28_mask_ashr_1: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $28, %al -; X32-NEXT: shrb %al -; X32-NEXT: retl +; X86-LABEL: test_i8_28_mask_ashr_1: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $28, %al +; X86-NEXT: shrb %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_ashr_1: ; X64: # %bb.0: @@ -222,12 +222,12 @@ define i8 @test_i8_28_mask_ashr_1(i8 %a0) { ret i8 %t1 } define i8 @test_i8_28_mask_ashr_2(i8 %a0) { -; X32-LABEL: test_i8_28_mask_ashr_2: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $28, %al -; X32-NEXT: shrb $2, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_28_mask_ashr_2: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $28, %al +; X86-NEXT: shrb $2, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_ashr_2: ; X64: # %bb.0: @@ -241,12 +241,12 @@ define i8 @test_i8_28_mask_ashr_2(i8 %a0) { ret i8 %t1 } define i8 @test_i8_28_mask_ashr_3(i8 %a0) { -; X32-LABEL: test_i8_28_mask_ashr_3: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $24, %al -; X32-NEXT: shrb $3, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_28_mask_ashr_3: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $24, %al +; X86-NEXT: shrb $3, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_ashr_3: ; X64: # %bb.0: @@ -260,12 +260,12 @@ define i8 @test_i8_28_mask_ashr_3(i8 %a0) { ret i8 %t1 } define i8 @test_i8_28_mask_ashr_4(i8 %a0) { -; X32-LABEL: test_i8_28_mask_ashr_4: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $16, %al -; X32-NEXT: shrb $4, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_28_mask_ashr_4: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $16, %al +; X86-NEXT: shrb $4, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_ashr_4: ; X64: # %bb.0: @@ -280,12 +280,12 @@ define i8 @test_i8_28_mask_ashr_4(i8 %a0) { } define i8 @test_i8_224_mask_ashr_1(i8 %a0) { -; X32-LABEL: test_i8_224_mask_ashr_1: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $-32, %al -; X32-NEXT: sarb %al -; X32-NEXT: retl +; X86-LABEL: test_i8_224_mask_ashr_1: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $-32, %al +; X86-NEXT: sarb %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_224_mask_ashr_1: ; X64: # %bb.0: @@ -299,12 +299,12 @@ define i8 @test_i8_224_mask_ashr_1(i8 %a0) { ret i8 %t1 } define i8 @test_i8_224_mask_ashr_4(i8 %a0) { -; X32-LABEL: test_i8_224_mask_ashr_4: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $-32, %al -; X32-NEXT: sarb $4, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_224_mask_ashr_4: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $-32, %al +; X86-NEXT: sarb $4, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_224_mask_ashr_4: ; X64: # %bb.0: @@ -318,11 +318,11 @@ define i8 @test_i8_224_mask_ashr_4(i8 %a0) { ret i8 %t1 } define i8 @test_i8_224_mask_ashr_5(i8 %a0) { -; X32-LABEL: test_i8_224_mask_ashr_5: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: sarb $5, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_224_mask_ashr_5: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: sarb $5, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_224_mask_ashr_5: ; X64: # %bb.0: @@ -335,11 +335,11 @@ define i8 @test_i8_224_mask_ashr_5(i8 %a0) { ret i8 %t1 } define i8 @test_i8_224_mask_ashr_6(i8 %a0) { -; X32-LABEL: test_i8_224_mask_ashr_6: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: sarb $6, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_224_mask_ashr_6: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: sarb $6, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_224_mask_ashr_6: ; X64: # %bb.0: @@ -355,12 +355,12 @@ define i8 @test_i8_224_mask_ashr_6(i8 %a0) { ; shl define i8 @test_i8_7_mask_shl_1(i8 %a0) { -; X32-LABEL: test_i8_7_mask_shl_1: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $7, %al -; X32-NEXT: addb %al, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_7_mask_shl_1: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $7, %al +; X86-NEXT: addb %al, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_7_mask_shl_1: ; X64: # %bb.0: @@ -374,12 +374,12 @@ define i8 @test_i8_7_mask_shl_1(i8 %a0) { ret i8 %t1 } define i8 @test_i8_7_mask_shl_4(i8 %a0) { -; X32-LABEL: test_i8_7_mask_shl_4: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $7, %al -; X32-NEXT: shlb $4, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_7_mask_shl_4: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $7, %al +; X86-NEXT: shlb $4, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_7_mask_shl_4: ; X64: # %bb.0: @@ -393,11 +393,11 @@ define i8 @test_i8_7_mask_shl_4(i8 %a0) { ret i8 %t1 } define i8 @test_i8_7_mask_shl_5(i8 %a0) { -; X32-LABEL: test_i8_7_mask_shl_5: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: shlb $5, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_7_mask_shl_5: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb $5, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_7_mask_shl_5: ; X64: # %bb.0: @@ -410,11 +410,11 @@ define i8 @test_i8_7_mask_shl_5(i8 %a0) { ret i8 %t1 } define i8 @test_i8_7_mask_shl_6(i8 %a0) { -; X32-LABEL: test_i8_7_mask_shl_6: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: shlb $6, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_7_mask_shl_6: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb $6, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_7_mask_shl_6: ; X64: # %bb.0: @@ -428,12 +428,12 @@ define i8 @test_i8_7_mask_shl_6(i8 %a0) { } define i8 @test_i8_28_mask_shl_1(i8 %a0) { -; X32-LABEL: test_i8_28_mask_shl_1: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $28, %al -; X32-NEXT: addb %al, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_28_mask_shl_1: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $28, %al +; X86-NEXT: addb %al, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_shl_1: ; X64: # %bb.0: @@ -447,12 +447,12 @@ define i8 @test_i8_28_mask_shl_1(i8 %a0) { ret i8 %t1 } define i8 @test_i8_28_mask_shl_2(i8 %a0) { -; X32-LABEL: test_i8_28_mask_shl_2: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $28, %al -; X32-NEXT: shlb $2, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_28_mask_shl_2: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $28, %al +; X86-NEXT: shlb $2, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_shl_2: ; X64: # %bb.0: @@ -466,12 +466,12 @@ define i8 @test_i8_28_mask_shl_2(i8 %a0) { ret i8 %t1 } define i8 @test_i8_28_mask_shl_3(i8 %a0) { -; X32-LABEL: test_i8_28_mask_shl_3: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $28, %al -; X32-NEXT: shlb $3, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_28_mask_shl_3: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $28, %al +; X86-NEXT: shlb $3, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_shl_3: ; X64: # %bb.0: @@ -485,12 +485,12 @@ define i8 @test_i8_28_mask_shl_3(i8 %a0) { ret i8 %t1 } define i8 @test_i8_28_mask_shl_4(i8 %a0) { -; X32-LABEL: test_i8_28_mask_shl_4: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $12, %al -; X32-NEXT: shlb $4, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_28_mask_shl_4: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $12, %al +; X86-NEXT: shlb $4, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_shl_4: ; X64: # %bb.0: @@ -505,12 +505,12 @@ define i8 @test_i8_28_mask_shl_4(i8 %a0) { } define i8 @test_i8_224_mask_shl_1(i8 %a0) { -; X32-LABEL: test_i8_224_mask_shl_1: -; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: andb $96, %al -; X32-NEXT: addb %al, %al -; X32-NEXT: retl +; X86-LABEL: test_i8_224_mask_shl_1: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $96, %al +; X86-NEXT: addb %al, %al +; X86-NEXT: retl ; ; X64-LABEL: test_i8_224_mask_shl_1: ; X64: # %bb.0: @@ -531,13 +531,13 @@ define i8 @test_i8_224_mask_shl_1(i8 %a0) { ; lshr define i16 @test_i16_127_mask_lshr_1(i16 %a0) { -; X32-LABEL: test_i16_127_mask_lshr_1: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $126, %eax -; X32-NEXT: shrl %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_127_mask_lshr_1: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $126, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_127_mask_lshr_1: ; X64: # %bb.0: @@ -552,13 +552,13 @@ define i16 @test_i16_127_mask_lshr_1(i16 %a0) { } define i16 @test_i16_2032_mask_lshr_3(i16 %a0) { -; X32-LABEL: test_i16_2032_mask_lshr_3: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $2032, %eax # imm = 0x7F0 -; X32-NEXT: shrl $3, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_2032_mask_lshr_3: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $2032, %eax # imm = 0x7F0 +; X86-NEXT: shrl $3, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_lshr_3: ; X64: # %bb.0: @@ -572,13 +572,13 @@ define i16 @test_i16_2032_mask_lshr_3(i16 %a0) { ret i16 %t1 } define i16 @test_i16_2032_mask_lshr_4(i16 %a0) { -; X32-LABEL: test_i16_2032_mask_lshr_4: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $4, %eax -; X32-NEXT: andl $127, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_2032_mask_lshr_4: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $127, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_lshr_4: ; X64: # %bb.0: @@ -592,13 +592,13 @@ define i16 @test_i16_2032_mask_lshr_4(i16 %a0) { ret i16 %t1 } define i16 @test_i16_2032_mask_lshr_5(i16 %a0) { -; X32-LABEL: test_i16_2032_mask_lshr_5: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $5, %eax -; X32-NEXT: andl $63, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_2032_mask_lshr_5: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $5, %eax +; X86-NEXT: andl $63, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_lshr_5: ; X64: # %bb.0: @@ -612,13 +612,13 @@ define i16 @test_i16_2032_mask_lshr_5(i16 %a0) { ret i16 %t1 } define i16 @test_i16_2032_mask_lshr_6(i16 %a0) { -; X32-LABEL: test_i16_2032_mask_lshr_6: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $6, %eax -; X32-NEXT: andl $31, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_2032_mask_lshr_6: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $6, %eax +; X86-NEXT: andl $31, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_lshr_6: ; X64: # %bb.0: @@ -633,13 +633,13 @@ define i16 @test_i16_2032_mask_lshr_6(i16 %a0) { } define i16 @test_i16_65024_mask_lshr_1(i16 %a0) { -; X32-LABEL: test_i16_65024_mask_lshr_1: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $65024, %eax # imm = 0xFE00 -; X32-NEXT: shrl %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_65024_mask_lshr_1: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $65024, %eax # imm = 0xFE00 +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_lshr_1: ; X64: # %bb.0: @@ -653,13 +653,13 @@ define i16 @test_i16_65024_mask_lshr_1(i16 %a0) { ret i16 %t1 } define i16 @test_i16_65024_mask_lshr_8(i16 %a0) { -; X32-LABEL: test_i16_65024_mask_lshr_8: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $65024, %eax # imm = 0xFE00 -; X32-NEXT: shrl $8, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_65024_mask_lshr_8: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $65024, %eax # imm = 0xFE00 +; X86-NEXT: shrl $8, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_lshr_8: ; X64: # %bb.0: @@ -673,12 +673,12 @@ define i16 @test_i16_65024_mask_lshr_8(i16 %a0) { ret i16 %t1 } define i16 @test_i16_65024_mask_lshr_9(i16 %a0) { -; X32-LABEL: test_i16_65024_mask_lshr_9: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $9, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_65024_mask_lshr_9: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $9, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_lshr_9: ; X64: # %bb.0: @@ -691,12 +691,12 @@ define i16 @test_i16_65024_mask_lshr_9(i16 %a0) { ret i16 %t1 } define i16 @test_i16_65024_mask_lshr_10(i16 %a0) { -; X32-LABEL: test_i16_65024_mask_lshr_10: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $10, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_65024_mask_lshr_10: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $10, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_lshr_10: ; X64: # %bb.0: @@ -712,13 +712,13 @@ define i16 @test_i16_65024_mask_lshr_10(i16 %a0) { ; ashr define i16 @test_i16_127_mask_ashr_1(i16 %a0) { -; X32-LABEL: test_i16_127_mask_ashr_1: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $126, %eax -; X32-NEXT: shrl %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_127_mask_ashr_1: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $126, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_127_mask_ashr_1: ; X64: # %bb.0: @@ -733,13 +733,13 @@ define i16 @test_i16_127_mask_ashr_1(i16 %a0) { } define i16 @test_i16_2032_mask_ashr_3(i16 %a0) { -; X32-LABEL: test_i16_2032_mask_ashr_3: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $2032, %eax # imm = 0x7F0 -; X32-NEXT: shrl $3, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_2032_mask_ashr_3: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $2032, %eax # imm = 0x7F0 +; X86-NEXT: shrl $3, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_ashr_3: ; X64: # %bb.0: @@ -753,13 +753,13 @@ define i16 @test_i16_2032_mask_ashr_3(i16 %a0) { ret i16 %t1 } define i16 @test_i16_2032_mask_ashr_4(i16 %a0) { -; X32-LABEL: test_i16_2032_mask_ashr_4: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $4, %eax -; X32-NEXT: andl $127, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_2032_mask_ashr_4: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $127, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_ashr_4: ; X64: # %bb.0: @@ -773,13 +773,13 @@ define i16 @test_i16_2032_mask_ashr_4(i16 %a0) { ret i16 %t1 } define i16 @test_i16_2032_mask_ashr_5(i16 %a0) { -; X32-LABEL: test_i16_2032_mask_ashr_5: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $5, %eax -; X32-NEXT: andl $63, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_2032_mask_ashr_5: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $5, %eax +; X86-NEXT: andl $63, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_ashr_5: ; X64: # %bb.0: @@ -793,13 +793,13 @@ define i16 @test_i16_2032_mask_ashr_5(i16 %a0) { ret i16 %t1 } define i16 @test_i16_2032_mask_ashr_6(i16 %a0) { -; X32-LABEL: test_i16_2032_mask_ashr_6: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $6, %eax -; X32-NEXT: andl $31, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_2032_mask_ashr_6: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $6, %eax +; X86-NEXT: andl $31, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_ashr_6: ; X64: # %bb.0: @@ -814,14 +814,14 @@ define i16 @test_i16_2032_mask_ashr_6(i16 %a0) { } define i16 @test_i16_65024_mask_ashr_1(i16 %a0) { -; X32-LABEL: test_i16_65024_mask_ashr_1: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $65024, %eax # imm = 0xFE00 -; X32-NEXT: cwtl -; X32-NEXT: shrl %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_65024_mask_ashr_1: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $65024, %eax # imm = 0xFE00 +; X86-NEXT: cwtl +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_ashr_1: ; X64: # %bb.0: @@ -835,14 +835,14 @@ define i16 @test_i16_65024_mask_ashr_1(i16 %a0) { ret i16 %t1 } define i16 @test_i16_65024_mask_ashr_8(i16 %a0) { -; X32-LABEL: test_i16_65024_mask_ashr_8: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $65024, %eax # imm = 0xFE00 -; X32-NEXT: cwtl -; X32-NEXT: shrl $8, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_65024_mask_ashr_8: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $65024, %eax # imm = 0xFE00 +; X86-NEXT: cwtl +; X86-NEXT: shrl $8, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_ashr_8: ; X64: # %bb.0: @@ -856,12 +856,12 @@ define i16 @test_i16_65024_mask_ashr_8(i16 %a0) { ret i16 %t1 } define i16 @test_i16_65024_mask_ashr_9(i16 %a0) { -; X32-LABEL: test_i16_65024_mask_ashr_9: -; X32: # %bb.0: -; X32-NEXT: movswl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $9, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_65024_mask_ashr_9: +; X86: # %bb.0: +; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $9, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_ashr_9: ; X64: # %bb.0: @@ -874,12 +874,12 @@ define i16 @test_i16_65024_mask_ashr_9(i16 %a0) { ret i16 %t1 } define i16 @test_i16_65024_mask_ashr_10(i16 %a0) { -; X32-LABEL: test_i16_65024_mask_ashr_10: -; X32: # %bb.0: -; X32-NEXT: movswl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $10, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_65024_mask_ashr_10: +; X86: # %bb.0: +; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $10, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_ashr_10: ; X64: # %bb.0: @@ -895,13 +895,13 @@ define i16 @test_i16_65024_mask_ashr_10(i16 %a0) { ; shl define i16 @test_i16_127_mask_shl_1(i16 %a0) { -; X32-LABEL: test_i16_127_mask_shl_1: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $127, %eax -; X32-NEXT: addl %eax, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_127_mask_shl_1: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $127, %eax +; X86-NEXT: addl %eax, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_127_mask_shl_1: ; X64: # %bb.0: @@ -915,13 +915,13 @@ define i16 @test_i16_127_mask_shl_1(i16 %a0) { ret i16 %t1 } define i16 @test_i16_127_mask_shl_8(i16 %a0) { -; X32-LABEL: test_i16_127_mask_shl_8: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $127, %eax -; X32-NEXT: shll $8, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_127_mask_shl_8: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $127, %eax +; X86-NEXT: shll $8, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_127_mask_shl_8: ; X64: # %bb.0: @@ -935,12 +935,12 @@ define i16 @test_i16_127_mask_shl_8(i16 %a0) { ret i16 %t1 } define i16 @test_i16_127_mask_shl_9(i16 %a0) { -; X32-LABEL: test_i16_127_mask_shl_9: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $9, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_127_mask_shl_9: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $9, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_127_mask_shl_9: ; X64: # %bb.0: @@ -953,12 +953,12 @@ define i16 @test_i16_127_mask_shl_9(i16 %a0) { ret i16 %t1 } define i16 @test_i16_127_mask_shl_10(i16 %a0) { -; X32-LABEL: test_i16_127_mask_shl_10: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $10, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_127_mask_shl_10: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $10, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_127_mask_shl_10: ; X64: # %bb.0: @@ -972,13 +972,13 @@ define i16 @test_i16_127_mask_shl_10(i16 %a0) { } define i16 @test_i16_2032_mask_shl_3(i16 %a0) { -; X32-LABEL: test_i16_2032_mask_shl_3: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $2032, %eax # imm = 0x7F0 -; X32-NEXT: shll $3, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_2032_mask_shl_3: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $2032, %eax # imm = 0x7F0 +; X86-NEXT: shll $3, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_shl_3: ; X64: # %bb.0: @@ -992,13 +992,13 @@ define i16 @test_i16_2032_mask_shl_3(i16 %a0) { ret i16 %t1 } define i16 @test_i16_2032_mask_shl_4(i16 %a0) { -; X32-LABEL: test_i16_2032_mask_shl_4: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $2032, %eax # imm = 0x7F0 -; X32-NEXT: shll $4, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_2032_mask_shl_4: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $2032, %eax # imm = 0x7F0 +; X86-NEXT: shll $4, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_shl_4: ; X64: # %bb.0: @@ -1012,13 +1012,13 @@ define i16 @test_i16_2032_mask_shl_4(i16 %a0) { ret i16 %t1 } define i16 @test_i16_2032_mask_shl_5(i16 %a0) { -; X32-LABEL: test_i16_2032_mask_shl_5: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $2032, %eax # imm = 0x7F0 -; X32-NEXT: shll $5, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_2032_mask_shl_5: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $2032, %eax # imm = 0x7F0 +; X86-NEXT: shll $5, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_shl_5: ; X64: # %bb.0: @@ -1032,13 +1032,13 @@ define i16 @test_i16_2032_mask_shl_5(i16 %a0) { ret i16 %t1 } define i16 @test_i16_2032_mask_shl_6(i16 %a0) { -; X32-LABEL: test_i16_2032_mask_shl_6: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $1008, %eax # imm = 0x3F0 -; X32-NEXT: shll $6, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_2032_mask_shl_6: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $1008, %eax # imm = 0x3F0 +; X86-NEXT: shll $6, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_shl_6: ; X64: # %bb.0: @@ -1053,13 +1053,13 @@ define i16 @test_i16_2032_mask_shl_6(i16 %a0) { } define i16 @test_i16_65024_mask_shl_1(i16 %a0) { -; X32-LABEL: test_i16_65024_mask_shl_1: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $32256, %eax # imm = 0x7E00 -; X32-NEXT: addl %eax, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax -; X32-NEXT: retl +; X86-LABEL: test_i16_65024_mask_shl_1: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $32256, %eax # imm = 0x7E00 +; X86-NEXT: addl %eax, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_shl_1: ; X64: # %bb.0: @@ -1080,12 +1080,12 @@ define i16 @test_i16_65024_mask_shl_1(i16 %a0) { ; lshr define i32 @test_i32_32767_mask_lshr_1(i32 %a0) { -; X32-LABEL: test_i32_32767_mask_lshr_1: -; X32: # %bb.0: -; X32-NEXT: movl $32766, %eax # imm = 0x7FFE -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_32767_mask_lshr_1: +; X86: # %bb.0: +; X86-NEXT: movl $32766, %eax # imm = 0x7FFE +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_32767_mask_lshr_1: ; X64: # %bb.0: @@ -1099,12 +1099,12 @@ define i32 @test_i32_32767_mask_lshr_1(i32 %a0) { } define i32 @test_i32_8388352_mask_lshr_7(i32 %a0) { -; X32-LABEL: test_i32_8388352_mask_lshr_7: -; X32: # %bb.0: -; X32-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $7, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_8388352_mask_lshr_7: +; X86: # %bb.0: +; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $7, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_lshr_7: ; X64: # %bb.0: @@ -1117,12 +1117,12 @@ define i32 @test_i32_8388352_mask_lshr_7(i32 %a0) { ret i32 %t1 } define i32 @test_i32_8388352_mask_lshr_8(i32 %a0) { -; X32-LABEL: test_i32_8388352_mask_lshr_8: -; X32: # %bb.0: -; X32-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $8, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_8388352_mask_lshr_8: +; X86: # %bb.0: +; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $8, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_lshr_8: ; X64: # %bb.0: @@ -1135,12 +1135,12 @@ define i32 @test_i32_8388352_mask_lshr_8(i32 %a0) { ret i32 %t1 } define i32 @test_i32_8388352_mask_lshr_9(i32 %a0) { -; X32-LABEL: test_i32_8388352_mask_lshr_9: -; X32: # %bb.0: -; X32-NEXT: movl $8388096, %eax # imm = 0x7FFE00 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $9, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_8388352_mask_lshr_9: +; X86: # %bb.0: +; X86-NEXT: movl $8388096, %eax # imm = 0x7FFE00 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $9, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_lshr_9: ; X64: # %bb.0: @@ -1153,12 +1153,12 @@ define i32 @test_i32_8388352_mask_lshr_9(i32 %a0) { ret i32 %t1 } define i32 @test_i32_8388352_mask_lshr_10(i32 %a0) { -; X32-LABEL: test_i32_8388352_mask_lshr_10: -; X32: # %bb.0: -; X32-NEXT: movl $8387584, %eax # imm = 0x7FFC00 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $10, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_8388352_mask_lshr_10: +; X86: # %bb.0: +; X86-NEXT: movl $8387584, %eax # imm = 0x7FFC00 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $10, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_lshr_10: ; X64: # %bb.0: @@ -1172,12 +1172,12 @@ define i32 @test_i32_8388352_mask_lshr_10(i32 %a0) { } define i32 @test_i32_4294836224_mask_lshr_1(i32 %a0) { -; X32-LABEL: test_i32_4294836224_mask_lshr_1: -; X32: # %bb.0: -; X32-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_4294836224_mask_lshr_1: +; X86: # %bb.0: +; X86-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_4294836224_mask_lshr_1: ; X64: # %bb.0: @@ -1190,12 +1190,12 @@ define i32 @test_i32_4294836224_mask_lshr_1(i32 %a0) { ret i32 %t1 } define i32 @test_i32_4294836224_mask_lshr_16(i32 %a0) { -; X32-LABEL: test_i32_4294836224_mask_lshr_16: -; X32: # %bb.0: -; X32-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $16, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_4294836224_mask_lshr_16: +; X86: # %bb.0: +; X86-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $16, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_4294836224_mask_lshr_16: ; X64: # %bb.0: @@ -1208,11 +1208,11 @@ define i32 @test_i32_4294836224_mask_lshr_16(i32 %a0) { ret i32 %t1 } define i32 @test_i32_4294836224_mask_lshr_17(i32 %a0) { -; X32-LABEL: test_i32_4294836224_mask_lshr_17: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $17, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_4294836224_mask_lshr_17: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $17, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_4294836224_mask_lshr_17: ; X64: # %bb.0: @@ -1224,11 +1224,11 @@ define i32 @test_i32_4294836224_mask_lshr_17(i32 %a0) { ret i32 %t1 } define i32 @test_i32_4294836224_mask_lshr_18(i32 %a0) { -; X32-LABEL: test_i32_4294836224_mask_lshr_18: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $18, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_4294836224_mask_lshr_18: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $18, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_4294836224_mask_lshr_18: ; X64: # %bb.0: @@ -1243,12 +1243,12 @@ define i32 @test_i32_4294836224_mask_lshr_18(i32 %a0) { ; ashr define i32 @test_i32_32767_mask_ashr_1(i32 %a0) { -; X32-LABEL: test_i32_32767_mask_ashr_1: -; X32: # %bb.0: -; X32-NEXT: movl $32766, %eax # imm = 0x7FFE -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_32767_mask_ashr_1: +; X86: # %bb.0: +; X86-NEXT: movl $32766, %eax # imm = 0x7FFE +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_32767_mask_ashr_1: ; X64: # %bb.0: @@ -1262,12 +1262,12 @@ define i32 @test_i32_32767_mask_ashr_1(i32 %a0) { } define i32 @test_i32_8388352_mask_ashr_7(i32 %a0) { -; X32-LABEL: test_i32_8388352_mask_ashr_7: -; X32: # %bb.0: -; X32-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $7, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_8388352_mask_ashr_7: +; X86: # %bb.0: +; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $7, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_ashr_7: ; X64: # %bb.0: @@ -1280,12 +1280,12 @@ define i32 @test_i32_8388352_mask_ashr_7(i32 %a0) { ret i32 %t1 } define i32 @test_i32_8388352_mask_ashr_8(i32 %a0) { -; X32-LABEL: test_i32_8388352_mask_ashr_8: -; X32: # %bb.0: -; X32-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $8, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_8388352_mask_ashr_8: +; X86: # %bb.0: +; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $8, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_ashr_8: ; X64: # %bb.0: @@ -1298,12 +1298,12 @@ define i32 @test_i32_8388352_mask_ashr_8(i32 %a0) { ret i32 %t1 } define i32 @test_i32_8388352_mask_ashr_9(i32 %a0) { -; X32-LABEL: test_i32_8388352_mask_ashr_9: -; X32: # %bb.0: -; X32-NEXT: movl $8388096, %eax # imm = 0x7FFE00 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $9, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_8388352_mask_ashr_9: +; X86: # %bb.0: +; X86-NEXT: movl $8388096, %eax # imm = 0x7FFE00 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $9, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_ashr_9: ; X64: # %bb.0: @@ -1316,12 +1316,12 @@ define i32 @test_i32_8388352_mask_ashr_9(i32 %a0) { ret i32 %t1 } define i32 @test_i32_8388352_mask_ashr_10(i32 %a0) { -; X32-LABEL: test_i32_8388352_mask_ashr_10: -; X32: # %bb.0: -; X32-NEXT: movl $8387584, %eax # imm = 0x7FFC00 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $10, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_8388352_mask_ashr_10: +; X86: # %bb.0: +; X86-NEXT: movl $8387584, %eax # imm = 0x7FFC00 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $10, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_ashr_10: ; X64: # %bb.0: @@ -1335,12 +1335,12 @@ define i32 @test_i32_8388352_mask_ashr_10(i32 %a0) { } define i32 @test_i32_4294836224_mask_ashr_1(i32 %a0) { -; X32-LABEL: test_i32_4294836224_mask_ashr_1: -; X32: # %bb.0: -; X32-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: sarl %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_4294836224_mask_ashr_1: +; X86: # %bb.0: +; X86-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sarl %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_4294836224_mask_ashr_1: ; X64: # %bb.0: @@ -1353,12 +1353,12 @@ define i32 @test_i32_4294836224_mask_ashr_1(i32 %a0) { ret i32 %t1 } define i32 @test_i32_4294836224_mask_ashr_16(i32 %a0) { -; X32-LABEL: test_i32_4294836224_mask_ashr_16: -; X32: # %bb.0: -; X32-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: sarl $16, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_4294836224_mask_ashr_16: +; X86: # %bb.0: +; X86-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sarl $16, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_4294836224_mask_ashr_16: ; X64: # %bb.0: @@ -1371,11 +1371,11 @@ define i32 @test_i32_4294836224_mask_ashr_16(i32 %a0) { ret i32 %t1 } define i32 @test_i32_4294836224_mask_ashr_17(i32 %a0) { -; X32-LABEL: test_i32_4294836224_mask_ashr_17: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: sarl $17, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_4294836224_mask_ashr_17: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sarl $17, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_4294836224_mask_ashr_17: ; X64: # %bb.0: @@ -1387,11 +1387,11 @@ define i32 @test_i32_4294836224_mask_ashr_17(i32 %a0) { ret i32 %t1 } define i32 @test_i32_4294836224_mask_ashr_18(i32 %a0) { -; X32-LABEL: test_i32_4294836224_mask_ashr_18: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: sarl $18, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_4294836224_mask_ashr_18: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sarl $18, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_4294836224_mask_ashr_18: ; X64: # %bb.0: @@ -1406,12 +1406,12 @@ define i32 @test_i32_4294836224_mask_ashr_18(i32 %a0) { ; shl define i32 @test_i32_32767_mask_shl_1(i32 %a0) { -; X32-LABEL: test_i32_32767_mask_shl_1: -; X32: # %bb.0: -; X32-NEXT: movl $32767, %eax # imm = 0x7FFF -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: addl %eax, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_32767_mask_shl_1: +; X86: # %bb.0: +; X86-NEXT: movl $32767, %eax # imm = 0x7FFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_32767_mask_shl_1: ; X64: # %bb.0: @@ -1424,12 +1424,12 @@ define i32 @test_i32_32767_mask_shl_1(i32 %a0) { ret i32 %t1 } define i32 @test_i32_32767_mask_shl_16(i32 %a0) { -; X32-LABEL: test_i32_32767_mask_shl_16: -; X32: # %bb.0: -; X32-NEXT: movl $32767, %eax # imm = 0x7FFF -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $16, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_32767_mask_shl_16: +; X86: # %bb.0: +; X86-NEXT: movl $32767, %eax # imm = 0x7FFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $16, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_32767_mask_shl_16: ; X64: # %bb.0: @@ -1442,11 +1442,11 @@ define i32 @test_i32_32767_mask_shl_16(i32 %a0) { ret i32 %t1 } define i32 @test_i32_32767_mask_shl_17(i32 %a0) { -; X32-LABEL: test_i32_32767_mask_shl_17: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $17, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_32767_mask_shl_17: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $17, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_32767_mask_shl_17: ; X64: # %bb.0: @@ -1458,11 +1458,11 @@ define i32 @test_i32_32767_mask_shl_17(i32 %a0) { ret i32 %t1 } define i32 @test_i32_32767_mask_shl_18(i32 %a0) { -; X32-LABEL: test_i32_32767_mask_shl_18: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $18, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_32767_mask_shl_18: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $18, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_32767_mask_shl_18: ; X64: # %bb.0: @@ -1475,12 +1475,12 @@ define i32 @test_i32_32767_mask_shl_18(i32 %a0) { } define i32 @test_i32_8388352_mask_shl_7(i32 %a0) { -; X32-LABEL: test_i32_8388352_mask_shl_7: -; X32: # %bb.0: -; X32-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $7, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_8388352_mask_shl_7: +; X86: # %bb.0: +; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $7, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_shl_7: ; X64: # %bb.0: @@ -1493,12 +1493,12 @@ define i32 @test_i32_8388352_mask_shl_7(i32 %a0) { ret i32 %t1 } define i32 @test_i32_8388352_mask_shl_8(i32 %a0) { -; X32-LABEL: test_i32_8388352_mask_shl_8: -; X32: # %bb.0: -; X32-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $8, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_8388352_mask_shl_8: +; X86: # %bb.0: +; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $8, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_shl_8: ; X64: # %bb.0: @@ -1511,12 +1511,12 @@ define i32 @test_i32_8388352_mask_shl_8(i32 %a0) { ret i32 %t1 } define i32 @test_i32_8388352_mask_shl_9(i32 %a0) { -; X32-LABEL: test_i32_8388352_mask_shl_9: -; X32: # %bb.0: -; X32-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $9, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_8388352_mask_shl_9: +; X86: # %bb.0: +; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $9, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_shl_9: ; X64: # %bb.0: @@ -1529,12 +1529,12 @@ define i32 @test_i32_8388352_mask_shl_9(i32 %a0) { ret i32 %t1 } define i32 @test_i32_8388352_mask_shl_10(i32 %a0) { -; X32-LABEL: test_i32_8388352_mask_shl_10: -; X32: # %bb.0: -; X32-NEXT: movl $4194048, %eax # imm = 0x3FFF00 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $10, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_8388352_mask_shl_10: +; X86: # %bb.0: +; X86-NEXT: movl $4194048, %eax # imm = 0x3FFF00 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $10, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_shl_10: ; X64: # %bb.0: @@ -1548,12 +1548,12 @@ define i32 @test_i32_8388352_mask_shl_10(i32 %a0) { } define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) { -; X32-LABEL: test_i32_4294836224_mask_shl_1: -; X32: # %bb.0: -; X32-NEXT: movl $2147352576, %eax # imm = 0x7FFE0000 -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: addl %eax, %eax -; X32-NEXT: retl +; X86-LABEL: test_i32_4294836224_mask_shl_1: +; X86: # %bb.0: +; X86-NEXT: movl $2147352576, %eax # imm = 0x7FFE0000 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i32_4294836224_mask_shl_1: ; X64: # %bb.0: @@ -1573,13 +1573,13 @@ define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) { ; lshr define i64 @test_i64_2147483647_mask_lshr_1(i64 %a0) { -; X32-LABEL: test_i64_2147483647_mask_lshr_1: -; X32: # %bb.0: -; X32-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_2147483647_mask_lshr_1: +; X86: # %bb.0: +; X86-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_2147483647_mask_lshr_1: ; X64: # %bb.0: @@ -1593,14 +1593,14 @@ define i64 @test_i64_2147483647_mask_lshr_1(i64 %a0) { } define i64 @test_i64_140737488289792_mask_lshr_15(i64 %a0) { -; X32-LABEL: test_i64_140737488289792_mask_lshr_15: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: shll $16, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shldl $17, %ecx, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_140737488289792_mask_lshr_15: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll $16, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shldl $17, %ecx, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_lshr_15: ; X64: # %bb.0: @@ -1613,14 +1613,14 @@ define i64 @test_i64_140737488289792_mask_lshr_15(i64 %a0) { ret i64 %t1 } define i64 @test_i64_140737488289792_mask_lshr_16(i64 %a0) { -; X32-LABEL: test_i64_140737488289792_mask_lshr_16: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl $32767, %eax # imm = 0x7FFF -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shldl $16, %ecx, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_140737488289792_mask_lshr_16: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $32767, %eax # imm = 0x7FFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shldl $16, %ecx, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_lshr_16: ; X64: # %bb.0: @@ -1633,14 +1633,14 @@ define i64 @test_i64_140737488289792_mask_lshr_16(i64 %a0) { ret i64 %t1 } define i64 @test_i64_140737488289792_mask_lshr_17(i64 %a0) { -; X32-LABEL: test_i64_140737488289792_mask_lshr_17: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl $32767, %eax # imm = 0x7FFF -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shldl $15, %ecx, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_140737488289792_mask_lshr_17: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $32767, %eax # imm = 0x7FFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shldl $15, %ecx, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_lshr_17: ; X64: # %bb.0: @@ -1653,14 +1653,14 @@ define i64 @test_i64_140737488289792_mask_lshr_17(i64 %a0) { ret i64 %t1 } define i64 @test_i64_140737488289792_mask_lshr_18(i64 %a0) { -; X32-LABEL: test_i64_140737488289792_mask_lshr_18: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl $32767, %eax # imm = 0x7FFF -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shldl $14, %ecx, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_140737488289792_mask_lshr_18: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $32767, %eax # imm = 0x7FFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shldl $14, %ecx, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_lshr_18: ; X64: # %bb.0: @@ -1674,12 +1674,12 @@ define i64 @test_i64_140737488289792_mask_lshr_18(i64 %a0) { } define i64 @test_i64_18446744065119617024_mask_lshr_1(i64 %a0) { -; X32-LABEL: test_i64_18446744065119617024_mask_lshr_1: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: shrl %edx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: retl +; X86-LABEL: test_i64_18446744065119617024_mask_lshr_1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shrl %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i64_18446744065119617024_mask_lshr_1: ; X64: # %bb.0: @@ -1692,12 +1692,12 @@ define i64 @test_i64_18446744065119617024_mask_lshr_1(i64 %a0) { ret i64 %t1 } define i64 @test_i64_18446744065119617024_mask_lshr_32(i64 %a0) { -; X32-LABEL: test_i64_18446744065119617024_mask_lshr_32: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: andl $-2, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_18446744065119617024_mask_lshr_32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $-2, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_18446744065119617024_mask_lshr_32: ; X64: # %bb.0: @@ -1710,12 +1710,12 @@ define i64 @test_i64_18446744065119617024_mask_lshr_32(i64 %a0) { ret i64 %t1 } define i64 @test_i64_18446744065119617024_mask_lshr_33(i64 %a0) { -; X32-LABEL: test_i64_18446744065119617024_mask_lshr_33: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_18446744065119617024_mask_lshr_33: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_18446744065119617024_mask_lshr_33: ; X64: # %bb.0: @@ -1727,12 +1727,12 @@ define i64 @test_i64_18446744065119617024_mask_lshr_33(i64 %a0) { ret i64 %t1 } define i64 @test_i64_18446744065119617024_mask_lshr_34(i64 %a0) { -; X32-LABEL: test_i64_18446744065119617024_mask_lshr_34: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl $2, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_18446744065119617024_mask_lshr_34: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $2, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_18446744065119617024_mask_lshr_34: ; X64: # %bb.0: @@ -1747,13 +1747,13 @@ define i64 @test_i64_18446744065119617024_mask_lshr_34(i64 %a0) { ; ashr define i64 @test_i64_2147483647_mask_ashr_1(i64 %a0) { -; X32-LABEL: test_i64_2147483647_mask_ashr_1: -; X32: # %bb.0: -; X32-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shrl %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_2147483647_mask_ashr_1: +; X86: # %bb.0: +; X86-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_2147483647_mask_ashr_1: ; X64: # %bb.0: @@ -1767,14 +1767,14 @@ define i64 @test_i64_2147483647_mask_ashr_1(i64 %a0) { } define i64 @test_i64_140737488289792_mask_ashr_15(i64 %a0) { -; X32-LABEL: test_i64_140737488289792_mask_ashr_15: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: shll $16, %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shldl $17, %ecx, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_140737488289792_mask_ashr_15: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll $16, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shldl $17, %ecx, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_ashr_15: ; X64: # %bb.0: @@ -1787,14 +1787,14 @@ define i64 @test_i64_140737488289792_mask_ashr_15(i64 %a0) { ret i64 %t1 } define i64 @test_i64_140737488289792_mask_ashr_16(i64 %a0) { -; X32-LABEL: test_i64_140737488289792_mask_ashr_16: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl $32767, %eax # imm = 0x7FFF -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shldl $16, %ecx, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_140737488289792_mask_ashr_16: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $32767, %eax # imm = 0x7FFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shldl $16, %ecx, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_ashr_16: ; X64: # %bb.0: @@ -1807,14 +1807,14 @@ define i64 @test_i64_140737488289792_mask_ashr_16(i64 %a0) { ret i64 %t1 } define i64 @test_i64_140737488289792_mask_ashr_17(i64 %a0) { -; X32-LABEL: test_i64_140737488289792_mask_ashr_17: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl $32767, %eax # imm = 0x7FFF -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shldl $15, %ecx, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_140737488289792_mask_ashr_17: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $32767, %eax # imm = 0x7FFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shldl $15, %ecx, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_ashr_17: ; X64: # %bb.0: @@ -1827,14 +1827,14 @@ define i64 @test_i64_140737488289792_mask_ashr_17(i64 %a0) { ret i64 %t1 } define i64 @test_i64_140737488289792_mask_ashr_18(i64 %a0) { -; X32-LABEL: test_i64_140737488289792_mask_ashr_18: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl $32767, %eax # imm = 0x7FFF -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shldl $14, %ecx, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_140737488289792_mask_ashr_18: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $32767, %eax # imm = 0x7FFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shldl $14, %ecx, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_ashr_18: ; X64: # %bb.0: @@ -1848,12 +1848,12 @@ define i64 @test_i64_140737488289792_mask_ashr_18(i64 %a0) { } define i64 @test_i64_18446744065119617024_mask_ashr_1(i64 %a0) { -; X32-LABEL: test_i64_18446744065119617024_mask_ashr_1: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: sarl %edx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: retl +; X86-LABEL: test_i64_18446744065119617024_mask_ashr_1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: sarl %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i64_18446744065119617024_mask_ashr_1: ; X64: # %bb.0: @@ -1866,13 +1866,13 @@ define i64 @test_i64_18446744065119617024_mask_ashr_1(i64 %a0) { ret i64 %t1 } define i64 @test_i64_18446744065119617024_mask_ashr_32(i64 %a0) { -; X32-LABEL: test_i64_18446744065119617024_mask_ashr_32: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl %edx, %eax -; X32-NEXT: andl $-2, %eax -; X32-NEXT: sarl $31, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_18446744065119617024_mask_ashr_32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: andl $-2, %eax +; X86-NEXT: sarl $31, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_18446744065119617024_mask_ashr_32: ; X64: # %bb.0: @@ -1885,13 +1885,13 @@ define i64 @test_i64_18446744065119617024_mask_ashr_32(i64 %a0) { ret i64 %t1 } define i64 @test_i64_18446744065119617024_mask_ashr_33(i64 %a0) { -; X32-LABEL: test_i64_18446744065119617024_mask_ashr_33: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl %edx, %eax -; X32-NEXT: sarl %eax -; X32-NEXT: sarl $31, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_18446744065119617024_mask_ashr_33: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: sarl %eax +; X86-NEXT: sarl $31, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_18446744065119617024_mask_ashr_33: ; X64: # %bb.0: @@ -1903,13 +1903,13 @@ define i64 @test_i64_18446744065119617024_mask_ashr_33(i64 %a0) { ret i64 %t1 } define i64 @test_i64_18446744065119617024_mask_ashr_34(i64 %a0) { -; X32-LABEL: test_i64_18446744065119617024_mask_ashr_34: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl %edx, %eax -; X32-NEXT: sarl $2, %eax -; X32-NEXT: sarl $31, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_18446744065119617024_mask_ashr_34: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: sarl $2, %eax +; X86-NEXT: sarl $31, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_18446744065119617024_mask_ashr_34: ; X64: # %bb.0: @@ -1924,12 +1924,12 @@ define i64 @test_i64_18446744065119617024_mask_ashr_34(i64 %a0) { ; shl define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) { -; X32-LABEL: test_i64_2147483647_mask_shl_1: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: addl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: retl +; X86-LABEL: test_i64_2147483647_mask_shl_1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: retl ; ; X64-LABEL: test_i64_2147483647_mask_shl_1: ; X64: # %bb.0: @@ -1941,12 +1941,12 @@ define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) { ret i64 %t1 } define i64 @test_i64_2147483647_mask_shl_32(i64 %a0) { -; X32-LABEL: test_i64_2147483647_mask_shl_32: -; X32: # %bb.0: -; X32-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X32-NEXT: andl {{[0-9]+}}(%esp), %edx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: retl +; X86-LABEL: test_i64_2147483647_mask_shl_32: +; X86: # %bb.0: +; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i64_2147483647_mask_shl_32: ; X64: # %bb.0: @@ -1959,12 +1959,12 @@ define i64 @test_i64_2147483647_mask_shl_32(i64 %a0) { ret i64 %t1 } define i64 @test_i64_2147483647_mask_shl_33(i64 %a0) { -; X32-LABEL: test_i64_2147483647_mask_shl_33: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: addl %edx, %edx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: retl +; X86-LABEL: test_i64_2147483647_mask_shl_33: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: addl %edx, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i64_2147483647_mask_shl_33: ; X64: # %bb.0: @@ -1976,12 +1976,12 @@ define i64 @test_i64_2147483647_mask_shl_33(i64 %a0) { ret i64 %t1 } define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) { -; X32-LABEL: test_i64_2147483647_mask_shl_34: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: shll $2, %edx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: retl +; X86-LABEL: test_i64_2147483647_mask_shl_34: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shll $2, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i64_2147483647_mask_shl_34: ; X64: # %bb.0: @@ -1994,16 +1994,16 @@ define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) { } define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { -; X32-LABEL: test_i64_140737488289792_mask_shl_15: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shll $16, %ecx -; X32-NEXT: movl $32767, %edx # imm = 0x7FFF -; X32-NEXT: andl {{[0-9]+}}(%esp), %edx -; X32-NEXT: shldl $15, %ecx, %edx -; X32-NEXT: shll $31, %eax -; X32-NEXT: retl +; X86-LABEL: test_i64_140737488289792_mask_shl_15: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $16, %ecx +; X86-NEXT: movl $32767, %edx # imm = 0x7FFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shldl $15, %ecx, %edx +; X86-NEXT: shll $31, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_shl_15: ; X64: # %bb.0: @@ -2016,15 +2016,15 @@ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { ret i64 %t1 } define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { -; X32-LABEL: test_i64_140737488289792_mask_shl_16: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $16, %eax -; X32-NEXT: movl $32767, %edx # imm = 0x7FFF -; X32-NEXT: andl {{[0-9]+}}(%esp), %edx -; X32-NEXT: shldl $16, %eax, %edx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: retl +; X86-LABEL: test_i64_140737488289792_mask_shl_16: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $16, %eax +; X86-NEXT: movl $32767, %edx # imm = 0x7FFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shldl $16, %eax, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_shl_16: ; X64: # %bb.0: @@ -2037,14 +2037,14 @@ define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { ret i64 %t1 } define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { -; X32-LABEL: test_i64_140737488289792_mask_shl_17: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $16, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: shldl $17, %eax, %edx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: retl +; X86-LABEL: test_i64_140737488289792_mask_shl_17: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $16, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shldl $17, %eax, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_shl_17: ; X64: # %bb.0: @@ -2057,14 +2057,14 @@ define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { ret i64 %t1 } define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { -; X32-LABEL: test_i64_140737488289792_mask_shl_18: -; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shll $16, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: shldl $18, %eax, %edx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: retl +; X86-LABEL: test_i64_140737488289792_mask_shl_18: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $16, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shldl $18, %eax, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_shl_18: ; X64: # %bb.0: @@ -2078,13 +2078,13 @@ define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { } define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) { -; X32-LABEL: test_i64_18446744065119617024_mask_shl_1: -; X32: # %bb.0: -; X32-NEXT: movl $2147483646, %edx # imm = 0x7FFFFFFE -; X32-NEXT: andl {{[0-9]+}}(%esp), %edx -; X32-NEXT: addl %edx, %edx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: retl +; X86-LABEL: test_i64_18446744065119617024_mask_shl_1: +; X86: # %bb.0: +; X86-NEXT: movl $2147483646, %edx # imm = 0x7FFFFFFE +; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: addl %edx, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; ; X64-LABEL: test_i64_18446744065119617024_mask_shl_1: ; X64: # %bb.0: diff --git a/llvm/test/CodeGen/X86/float-conv-elim.ll b/llvm/test/CodeGen/X86/float-conv-elim.ll index 4a651cf..2a543df 100644 --- a/llvm/test/CodeGen/X86/float-conv-elim.ll +++ b/llvm/test/CodeGen/X86/float-conv-elim.ll @@ -1,32 +1,137 @@ -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64 < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-- -mcpu=x86-64 < %s | FileCheck %s -; Make sure the float conversion is folded away as it should be. -; CHECK-LABEL: foo -; CHECK-NOT: cvt -; CHECK: movzbl -define i32 @foo(i8 %a) #0 { +define i32 @u8_f32_s32(i8 %a) { +; CHECK-LABEL: u8_f32_s32: +; CHECK: # %bb.0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: retq %conv = uitofp i8 %a to float %conv1 = fptosi float %conv to i32 ret i32 %conv1 } -; CHECK-LABEL: foo2 -; CHECK-NOT: cvt -; CHECK: movsbl -define i32 @foo2(i8 %a) #0 { +define i32 @s8_f32_s32(i8 %a) { +; CHECK-LABEL: s8_f32_s32: +; CHECK: # %bb.0: +; CHECK-NEXT: movsbl %dil, %eax +; CHECK-NEXT: retq %conv = sitofp i8 %a to float %conv1 = fptosi float %conv to i32 ret i32 %conv1 } -; CHECK-LABEL: bar -; CHECK-NOT: cvt -; CHECK: movl -define zeroext i8 @bar(i8 zeroext %a) #0 { +define zeroext i8 @u8_f32_u8(i8 zeroext %a) { +; CHECK-LABEL: u8_f32_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq %conv = uitofp i8 %a to float %conv1 = fptoui float %conv to i8 ret i8 %conv1 } -attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +define i32 @s32_f32_s24_s32(i32 %a) { +; CHECK-LABEL: s32_f32_s24_s32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll $8, %eax +; CHECK-NEXT: sarl $8, %eax +; CHECK-NEXT: retq + %f = sitofp i32 %a to float + %i = fptosi float %f to i24 + %r = sext i24 %i to i32 + ret i32 %r +} + +define i32 @s32_f32_u24_u32(i32 %a) { +; CHECK-LABEL: s32_f32_u24_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $16777215, %eax # imm = 0xFFFFFF +; CHECK-NEXT: retq + %f = sitofp i32 %a to float + %i = fptoui float %f to i24 + %r = zext i24 %i to i32 + ret i32 %r +} + +define i32 @u32_f32_s24_s32(i32 %a) { +; CHECK-LABEL: u32_f32_s24_s32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll $8, %eax +; CHECK-NEXT: sarl $8, %eax +; CHECK-NEXT: retq + %f = uitofp i32 %a to float + %i = fptosi float %f to i24 + %r = sext i24 %i to i32 + ret i32 %r +} + +define i32 @u32_f32_u24_u32(i32 %a) { +; CHECK-LABEL: u32_f32_u24_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $16777215, %eax # imm = 0xFFFFFF +; CHECK-NEXT: retq + %f = uitofp i32 %a to float + %i = fptoui float %f to i24 + %r = zext i24 %i to i32 + ret i32 %r +} + +; This requires converting to FP and back. +define i32 @s32_f32_s25_s32(i32 %a) { +; CHECK-LABEL: s32_f32_s25_s32: +; CHECK: # %bb.0: +; CHECK-NEXT: cvtsi2ss %edi, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: retq + %f = sitofp i32 %a to float + %i = fptosi float %f to i25 + %r = sext i25 %i to i32 + ret i32 %r +} + +define i32 @s32_f32_u25_u32(i32 %a) { +; CHECK-LABEL: s32_f32_u25_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: cvtsi2ss %edi, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: retq + %f = sitofp i32 %a to float + %i = fptoui float %f to i25 + %r = zext i25 %i to i32 + ret i32 %r +} + +; TODO: This could avoid converting to FP. + +define i32 @u32_f32_s25_s32(i32 %a) { +; CHECK-LABEL: u32_f32_s25_s32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cvtsi2ss %rax, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: retq + %f = uitofp i32 %a to float + %i = fptosi float %f to i25 + %r = sext i25 %i to i32 + ret i32 %r +} + +define i32 @u32_f32_u25_u32(i32 %a) { +; CHECK-LABEL: u32_f32_u25_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cvtsi2ss %rax, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: retq + %f = uitofp i32 %a to float + %i = fptoui float %f to i25 + %r = zext i25 %i to i32 + ret i32 %r +} diff --git a/llvm/test/CodeGen/X86/load-local-v3i129.ll b/llvm/test/CodeGen/X86/load-local-v3i129.ll index b88e7a0..f339891 100644 --- a/llvm/test/CodeGen/X86/load-local-v3i129.ll +++ b/llvm/test/CodeGen/X86/load-local-v3i129.ll @@ -1,20 +1,39 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=FAST-SHLD +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefix=SLOW-SHLD define void @_start() { -; CHECK-LABEL: _start: -; CHECK: # %bb.0: # %Entry -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx -; CHECK-NEXT: shrdq $2, %rcx, %rax -; CHECK-NEXT: shrq $2, %rcx -; CHECK-NEXT: leaq 1(,%rax,4), %rdx -; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: shrdq $62, %rcx, %rax -; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: orq $-2, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq $-1, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: retq +; FAST-SHLD-LABEL: _start: +; FAST-SHLD: # %bb.0: # %Entry +; FAST-SHLD-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; FAST-SHLD-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; FAST-SHLD-NEXT: shrdq $2, %rcx, %rax +; FAST-SHLD-NEXT: shrq $2, %rcx +; FAST-SHLD-NEXT: leaq 1(,%rax,4), %rdx +; FAST-SHLD-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; FAST-SHLD-NEXT: shrdq $62, %rcx, %rax +; FAST-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; FAST-SHLD-NEXT: orq $-2, -{{[0-9]+}}(%rsp) +; FAST-SHLD-NEXT: movq $-1, -{{[0-9]+}}(%rsp) +; FAST-SHLD-NEXT: retq +; +; SLOW-SHLD-LABEL: _start: +; SLOW-SHLD: # %bb.0: # %Entry +; SLOW-SHLD-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; SLOW-SHLD-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; SLOW-SHLD-NEXT: shrq $2, %rax +; SLOW-SHLD-NEXT: movq %rcx, %rdx +; SLOW-SHLD-NEXT: shlq $62, %rdx +; SLOW-SHLD-NEXT: orq %rax, %rdx +; SLOW-SHLD-NEXT: andq $-4, %rcx +; SLOW-SHLD-NEXT: leaq 1(,%rdx,4), %rax +; SLOW-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; SLOW-SHLD-NEXT: shrq $62, %rdx +; SLOW-SHLD-NEXT: orq %rcx, %rdx +; SLOW-SHLD-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; SLOW-SHLD-NEXT: orq $-2, -{{[0-9]+}}(%rsp) +; SLOW-SHLD-NEXT: movq $-1, -{{[0-9]+}}(%rsp) +; SLOW-SHLD-NEXT: retq Entry: %y = alloca <3 x i129>, align 4 %L = load <3 x i129>, <3 x i129>* %y diff --git a/llvm/test/CodeGen/X86/lsr-interesting-step.ll b/llvm/test/CodeGen/X86/lsr-interesting-step.ll index c9096156..66afd38 100644 --- a/llvm/test/CodeGen/X86/lsr-interesting-step.ll +++ b/llvm/test/CodeGen/X86/lsr-interesting-step.ll @@ -1,17 +1,35 @@ -; RUN: llc < %s -relocation-model=static -mtriple=x86_64-unknown-linux-gnu -asm-verbose=0 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -relocation-model=static -mtriple=x86_64-unknown-linux-gnu | FileCheck %s ; The inner loop should require only one add (and no leas either). ; rdar://8100380 -; CHECK: BB0_2: -; CHECK-NEXT: movb $0, flags(%rcx) -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: cmpq $8192, %rcx -; CHECK-NEXT: jl - @flags = external dso_local global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1] define void @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $2, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %bb7 +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_2 Depth 2 +; CHECK-NEXT: movl $2, %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_2: # %bb11 +; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: movb $0, flags(%rcx) +; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: cmpq $8192, %rcx # imm = 0x2000 +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: # %bb.3: # %bb16 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: incq %rax +; CHECK-NEXT: cmpq $8192, %rax # imm = 0x2000 +; CHECK-NEXT: jl .LBB0_1 +; CHECK-NEXT: # %bb.4: # %bb20 +; CHECK-NEXT: retq entry: br label %bb diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index 779f9dc..5a7f572 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -4539,6 +4539,8 @@ define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) { ret i1 %val } +; TODO: We expect similar result as for PR39665_c_ray_opt, +; but this is not the case in practice. define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) { ; SSE-LABEL: PR39665_c_ray: ; SSE: # %bb.0: @@ -4597,3 +4599,55 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) { %r = select i1 %u, i32 42, i32 99 ret i32 %r } + +define i32 @PR39665_c_ray_opt(<2 x double> %x, <2 x double> %y) { +; SSE-LABEL: PR39665_c_ray_opt: +; SSE: # %bb.0: +; SSE-NEXT: cmpltpd %xmm0, %xmm1 +; SSE-NEXT: movmskpd %xmm1, %eax +; SSE-NEXT: cmpb $3, %al +; SSE-NEXT: movl $42, %ecx +; SSE-NEXT: movl $99, %eax +; SSE-NEXT: cmovel %ecx, %eax +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: PR39665_c_ray_opt: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax +; AVX1OR2-NEXT: cmpb $3, %al +; AVX1OR2-NEXT: movl $42, %ecx +; AVX1OR2-NEXT: movl $99, %eax +; AVX1OR2-NEXT: cmovel %ecx, %eax +; AVX1OR2-NEXT: retq +; +; KNL-LABEL: PR39665_c_ray_opt: +; KNL: # %bb.0: +; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; KNL-NEXT: knotw %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: testb $3, %al +; KNL-NEXT: movl $42, %ecx +; KNL-NEXT: movl $99, %eax +; KNL-NEXT: cmovel %ecx, %eax +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq +; +; SKX-LABEL: PR39665_c_ray_opt: +; SKX: # %bb.0: +; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: cmpb $3, %al +; SKX-NEXT: movl $42, %ecx +; SKX-NEXT: movl $99, %eax +; SKX-NEXT: cmovel %ecx, %eax +; SKX-NEXT: retq + %cmp = fcmp ogt <2 x double> %x, %y + %shift = shufflevector <2 x i1> %cmp, <2 x i1> poison, <2 x i32> <i32 1, i32 undef> + %1 = and <2 x i1> %cmp, %shift + %u = extractelement <2 x i1> %1, i64 0 + %r = select i1 %u, i32 42, i32 99 + ret i32 %r +} diff --git a/llvm/test/CodeGen/X86/sink-out-of-loop.ll b/llvm/test/CodeGen/X86/sink-out-of-loop.ll index e7b721d..e3443b9 100644 --- a/llvm/test/CodeGen/X86/sink-out-of-loop.ll +++ b/llvm/test/CodeGen/X86/sink-out-of-loop.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s ; A MOV32ri is inside a loop, it has two successors, one successor is inside the @@ -5,15 +6,48 @@ ; MOV32ri outside the loop. ; rdar://11980766 define i32 @sink_succ(i32 %argc, i8** nocapture %argv) nounwind uwtable ssp { -; CHECK-LABEL: sink_succ -; CHECK: [[OUTER_LN1:LBB0_[0-9]+]]: ## %preheader -; CHECK: %exit -; CHECK-NOT: movl -; CHECK: jne [[OUTER_LN1]] -; CHECK: movl -; CHECK: [[LN2:LBB0_[0-9]+]]: ## %for.body2 -; CHECK: jne [[LN2]] -; CHECK: ret +; CHECK-LABEL: sink_succ: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_1: ## %preheader +; CHECK-NEXT: ## =>This Loop Header: Depth=1 +; CHECK-NEXT: ## Child Loop BB0_2 Depth 2 +; CHECK-NEXT: ## Child Loop BB0_3 Depth 3 +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_2: ## %for.body1.lr +; CHECK-NEXT: ## Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: ## => This Loop Header: Depth=2 +; CHECK-NEXT: ## Child Loop BB0_3 Depth 3 +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_3: ## %for.body1 +; CHECK-NEXT: ## Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: ## Parent Loop BB0_2 Depth=2 +; CHECK-NEXT: ## => This Inner Loop Header: Depth=3 +; CHECK-NEXT: decl %edx +; CHECK-NEXT: jne LBB0_3 +; CHECK-NEXT: ## %bb.4: ## %for.inc40.i +; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=2 +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: cmpl $32, %ecx +; CHECK-NEXT: jne LBB0_2 +; CHECK-NEXT: ## %bb.5: ## %exit +; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: incl %eax +; CHECK-NEXT: cmpl $10, %eax +; CHECK-NEXT: jne LBB0_1 +; CHECK-NEXT: ## %bb.6: ## %for.body2.preheader +; CHECK-NEXT: movl $2048, %eax ## imm = 0x800 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_7: ## %for.body2 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: decl %eax +; CHECK-NEXT: jne LBB0_7 +; CHECK-NEXT: ## %bb.8: ## %for.end20 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq entry: br label %preheader @@ -55,6 +89,19 @@ for.end20: define i32 @sink_out_of_loop(i32 %n, i32* %output) { ; CHECK-LABEL: sink_out_of_loop: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_1: ## %loop +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: movl %ecx, (%rsi,%rcx,4) +; CHECK-NEXT: incq %rcx +; CHECK-NEXT: cmpl %edi, %ecx +; CHECK-NEXT: jl LBB1_1 +; CHECK-NEXT: ## %bb.2: ## %exit +; CHECK-NEXT: imull %eax, %eax +; CHECK-NEXT: retq entry: br label %loop @@ -68,8 +115,5 @@ loop: br i1 %exit_cond, label %exit, label %loop exit: -; CHECK: %bb.2 -; CHECK: imull %eax, %eax -; CHECK: retq ret i32 %j } diff --git a/llvm/test/DebugInfo/X86/dbg-value-funcarg.ll b/llvm/test/DebugInfo/X86/dbg-value-funcarg.ll index 4818fbf..f1edf09 100644 --- a/llvm/test/DebugInfo/X86/dbg-value-funcarg.ll +++ b/llvm/test/DebugInfo/X86/dbg-value-funcarg.ll @@ -122,14 +122,13 @@ define dso_local void @foo_same_param(i32 %t3a) local_unnamed_addr #0 !dbg !31 { ; CHECK: DBG_VALUE %0, $noreg, ![[T3A]], !DIExpression(), ; CHECK: TCRETURNdi64 @bar, ; INSTRREF-LABEL: name: foo_same_param -; INSTRREF: DBG_PHI $edi, 2 ; INSTRREF: DBG_PHI $edi, 1 ; INSTRREF: DBG_VALUE $edi, $noreg, ![[T3A]], !DIExpression(), ; INSTRREF: CALL64pcrel32 @bar, ; INSTRREF: DBG_INSTR_REF 1, 0, ![[TMP]], !DIExpression(), ; INSTRREF: DBG_VALUE 123, $noreg, ![[T3A]], !DIExpression(), ; INSTRREF: CALL64pcrel32 @bar, -; INSTRREF: DBG_INSTR_REF 2, 0, ![[T3A]], !DIExpression(), +; INSTRREF: DBG_INSTR_REF 1, 0, ![[T3A]], !DIExpression(), ; INSTRREF: TCRETURNdi64 @bar, entry: call void @llvm.dbg.value(metadata i32 %t3a, metadata !33, metadata !DIExpression()), !dbg !35 diff --git a/llvm/test/DebugInfo/X86/dbg-value-funcarg4.ll b/llvm/test/DebugInfo/X86/dbg-value-funcarg4.ll new file mode 100644 index 0000000..df52a39 --- /dev/null +++ b/llvm/test/DebugInfo/X86/dbg-value-funcarg4.ll @@ -0,0 +1,51 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before=finalize-isel -o - %s -experimental-debug-variable-locations=true | FileCheck %s --implicit-check-not=DBG_PHI + +; Test that for multiple dbg.values referring to the same argument, we emit a +; single DBG_PHI and refer to it twice. (Using more than one DBG_PHI is fine, +; but inefficient). + +; CHECK-DAG: ![[LOCAL:.*]] = !DILocalVariable(name: "local" +; CHECK-DAG: ![[LOCAL2:.*]] = !DILocalVariable(name: "local2" + +; CHECK: DBG_PHI $edi, 1 + +; CHECK: DBG_INSTR_REF 1, 0, ![[LOCAL]], !DIExpression(), +; CHECK: DBG_INSTR_REF 1, 0, ![[LOCAL2]], !DIExpression(), + +declare void @bar(i32) +declare void @llvm.dbg.value(metadata, metadata, metadata) + +define dso_local void @foo_local(i32 %t1a) local_unnamed_addr !dbg !7 { +entry: + tail call void @bar(i32 %t1a) #3, !dbg !17 + %bees = add i32 %t1a, 3 + call void @llvm.dbg.value(metadata i32 %t1a, metadata !13, metadata !DIExpression()), !dbg !14 + call void @llvm.dbg.value(metadata i32 %t1a, metadata !19, metadata !DIExpression()), !dbg !14 + tail call void @bar(i32 %bees) #3, !dbg !17 + ret void, !dbg !18 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "foo.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang"} +!7 = distinct !DISubprogram(name: "foo_local", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !{!12, !13} +!12 = !DILocalVariable(name: "t1a", arg: 1, scope: !7, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "local", scope: !7, file: !1, line: 4, type: !10) +!14 = !DILocation(line: 3, column: 20, scope: !7) +!15 = !DILocation(line: 4, column: 7, scope: !7) +!16 = !DILocation(line: 5, column: 3, scope: !7) +!17 = !DILocation(line: 7, column: 3, scope: !7) +!18 = !DILocation(line: 8, column: 1, scope: !7) +!19 = !DILocalVariable(name: "local2", scope: !7, file: !1, line: 4, type: !10) diff --git a/llvm/test/DebugInfo/X86/instr-ref-selectiondag.ll b/llvm/test/DebugInfo/X86/instr-ref-selectiondag.ll index 465d496..34eab5e 100644 --- a/llvm/test/DebugInfo/X86/instr-ref-selectiondag.ll +++ b/llvm/test/DebugInfo/X86/instr-ref-selectiondag.ll @@ -237,12 +237,11 @@ shoes: ; FASTISEL-INSTRREF-LABEL: name: qux -; FASTISEL-INSTRREF: DBG_PHI $rdi, 2 -; FASTISEL-INSTRREF-NEXT: DBG_PHI $rdi, 1 +; FASTISEL-INSTRREF: DBG_PHI $rdi, 1 ; FASTISEL-INSTRREF: DBG_INSTR_REF 1, 0, ![[SOCKS]], !DIExpression(DW_OP_deref), ; FASTISEL-INSTRREF-LABEL: bb.1.lala: -; FASTISEL-INSTRREF: DBG_INSTR_REF 2, 0, ![[KNEES]], !DIExpression(DW_OP_deref), +; FASTISEL-INSTRREF: DBG_INSTR_REF 1, 0, ![[KNEES]], !DIExpression(DW_OP_deref), declare i64 @cheddar(i32 *%arg) define void @qux(i32* noalias sret(i32) %agg.result) !dbg !40 { diff --git a/llvm/test/Instrumentation/HeapProfiler/no-instrumentation.ll b/llvm/test/Instrumentation/HeapProfiler/no-instrumentation.ll new file mode 100644 index 0000000..c05c013 --- /dev/null +++ b/llvm/test/Instrumentation/HeapProfiler/no-instrumentation.ll @@ -0,0 +1,23 @@ +;; Test that we don't add any instrumentation code to functions without +;; interesting memory accesses. +; +; RUN: opt < %s -passes='function(memprof),module(memprof-module)' -S -debug 2>&1 | FileCheck %s + +;; Require asserts for -debug +; REQUIRES: asserts + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @_Z3foov() { +entry: + ret void +} + +;; Confirm we ran memprof and decided not to instrument +; CHECK: MEMPROF done instrumenting: 0 define void @_Z3foov + +;; We should not add any instrumentation related code +; CHECK: define void @_Z3foov +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void diff --git a/llvm/test/Instrumentation/HeapProfiler/skip-compiler-inserted.ll b/llvm/test/Instrumentation/HeapProfiler/skip-compiler-inserted.ll new file mode 100644 index 0000000..716974d --- /dev/null +++ b/llvm/test/Instrumentation/HeapProfiler/skip-compiler-inserted.ll @@ -0,0 +1,47 @@ +;; Test that we don't instrument loads to PGO counters or other +;; compiler inserted variables. +; +; RUN: opt < %s -passes='function(memprof),module(memprof-module)' -S | FileCheck --check-prefixes=CHECK %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +$__profc__Z3foov = comdat nodeduplicate +@__profc__Z3foov = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8 +@__llvm_gcov_ctr = internal global [1 x i64] zeroinitializer + +define void @_Z3foov(i32* %a) { +entry: + ;; Load that should get instrumentation. + %tmp1 = load i32, i32* %a, align 4 + ;; PGO counter update + %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3foov, i64 0, i64 0), align 8 + %0 = add i64 %pgocount, 1 + store i64 %0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3foov, i64 0, i64 0), align 8 + ;; Gcov counter update + %gcovcount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_gcov_ctr, i64 0, i64 0), align 8 + %1 = add i64 %gcovcount, 1 + store i64 %1, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_gcov_ctr, i64 0, i64 0), align 8 + ret void +} + +;; We should only add memory profile instrumentation for the first load. +; CHECK: define void @_Z3foov +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = load i64, i64* @__memprof_shadow_memory_dynamic_address, align 8 +; CHECK-NEXT: %1 = ptrtoint i32* %a to i64 +; CHECK-NEXT: %2 = and i64 %1, -64 +; CHECK-NEXT: %3 = lshr i64 %2, 3 +; CHECK-NEXT: %4 = add i64 %3, %0 +; CHECK-NEXT: %5 = inttoptr i64 %4 to i64* +; CHECK-NEXT: %6 = load i64, i64* %5, align 8 +; CHECK-NEXT: %7 = add i64 %6, 1 +; CHECK-NEXT: store i64 %7, i64* %5, align 8 +; CHECK-NEXT: %tmp1 = load i32, i32* %a, align 4 +; CHECK-NEXT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3foov, i64 0, i64 0) +; CHECK-NEXT: %8 = add i64 %pgocount, 1 +; CHECK-NEXT: store i64 %8, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3foov, i64 0, i64 0) +; CHECK-NEXT: %gcovcount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_gcov_ctr, i64 0, i64 0) +; CHECK-NEXT: %9 = add i64 %gcovcount, 1 +; CHECK-NEXT: store i64 %9, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_gcov_ctr, i64 0, i64 0) +; CHECK-NEXT: ret void diff --git a/llvm/test/MC/AArch64/armv8.2a-dotprod.s b/llvm/test/MC/AArch64/armv8.2a-dotprod.s index c561fbc..45dc8d0 100644 --- a/llvm/test/MC/AArch64/armv8.2a-dotprod.s +++ b/llvm/test/MC/AArch64/armv8.2a-dotprod.s @@ -13,6 +13,7 @@ // RUN: llvm-mc -triple aarch64 -mcpu=tsv110 -show-encoding < %s | FileCheck %s --check-prefix=CHECK-DOTPROD // RUN: llvm-mc -triple aarch64 -mcpu=cortex-r82 -show-encoding < %s | FileCheck %s --check-prefix=CHECK-DOTPROD // RUN: llvm-mc -triple aarch64 -mattr=+v8r -show-encoding < %s | FileCheck %s --check-prefix=CHECK-DOTPROD +// RUN: llvm-mc -triple aarch64 -mcpu=ampere1 -show-encoding < %s | FileCheck %s --check-prefix=CHECK-DOTPROD // RUN: not llvm-mc -triple aarch64 -mattr=+v8.2a -show-encoding < %s 2> %t // RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s @@ -36,6 +37,8 @@ // RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s // RUN: not llvm-mc -triple aarch64 -mcpu=neoverse-n2 -mattr=-dotprod -show-encoding < %s 2> %t // RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s +// RUN: not llvm-mc -triple aarch64 -mcpu=ampere1 -mattr=-dotprod -show-encoding < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-NO-DOTPROD < %t %s udot v0.2s, v1.8b, v2.8b sdot v0.2s, v1.8b, v2.8b diff --git a/llvm/test/MC/AArch64/armv8.3a-rcpc.s b/llvm/test/MC/AArch64/armv8.3a-rcpc.s index 1660cc4..271eea5 100644 --- a/llvm/test/MC/AArch64/armv8.3a-rcpc.s +++ b/llvm/test/MC/AArch64/armv8.3a-rcpc.s @@ -6,6 +6,7 @@ // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mcpu=neoverse-e1 < %s 2>&1 | FileCheck %s // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mcpu=neoverse-n1 < %s 2>&1 | FileCheck %s // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mcpu=neoverse-n2 < %s 2>&1 | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mcpu=ampere1 < %s 2>&1 | FileCheck %s // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a -mattr=+rcpc < %s 2>&1 | FileCheck %s // RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a < %s 2> %t // RUN: FileCheck --check-prefix=CHECK-REQ %s < %t diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt b/llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt index c6d7c79..d7d246b 100644 --- a/llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt +++ b/llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt @@ -12,6 +12,7 @@ # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-e1 --disassemble < %s | FileCheck %s # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n1 --disassemble < %s | FileCheck %s # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n2 --disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=ampere1 --disassemble < %s | FileCheck %s # CHECK: ldaprb w0, [x0] # CHECK: ldaprh w0, [x0] diff --git a/llvm/test/MC/SystemZ/insn-bad.s b/llvm/test/MC/SystemZ/insn-bad.s index dbe5b71..e616026 100644 --- a/llvm/test/MC/SystemZ/insn-bad.s +++ b/llvm/test/MC/SystemZ/insn-bad.s @@ -512,6 +512,8 @@ #CHECK: error: offset out of range #CHECK: brasl %r0, -0x1000000002 #CHECK: error: offset out of range +#CHECK: brasl %r0, .-0x1000000002 +#CHECK: error: offset out of range #CHECK: brasl %r0, -1 #CHECK: error: offset out of range #CHECK: brasl %r0, 1 @@ -520,6 +522,8 @@ #CHECK: error: offset out of range #CHECK: jasl %r0, -0x1000000002 #CHECK: error: offset out of range +#CHECK: jasl %r0, .-0x1000000002 +#CHECK: error: offset out of range #CHECK: jasl %r0, -1 #CHECK: error: offset out of range #CHECK: jasl %r0, 1 @@ -527,10 +531,12 @@ #CHECK: jasl %r0, 0x100000000 brasl %r0, -0x1000000002 + brasl %r0, .-0x1000000002 brasl %r0, -1 brasl %r0, 1 brasl %r0, 0x100000000 jasl %r0, -0x1000000002 + jasl %r0, .-0x1000000002 jasl %r0, -1 jasl %r0, 1 jasl %r0, 0x100000000 diff --git a/llvm/test/MC/SystemZ/insn-good.s b/llvm/test/MC/SystemZ/insn-good.s index ce610b5..e7f73c7 100644 --- a/llvm/test/MC/SystemZ/insn-good.s +++ b/llvm/test/MC/SystemZ/insn-good.s @@ -1263,6 +1263,12 @@ #CHECK: fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL brasl %r0, -0x100000000 jasl %r0, -0x100000000 +#CHECK: brasl %r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc0,0x05,A,A,A,A] +#CHECK: fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL +#CHECK: brasl %r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc0,0x05,A,A,A,A] +#CHECK: fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL + brasl %r0, .-0x100000000 + jasl %r0, .-0x100000000 #CHECK: brasl %r0, .[[LAB:L.*]]-2 # encoding: [0xc0,0x05,A,A,A,A] #CHECK: fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL #CHECK: brasl %r0, .[[LAB:L.*]]-2 # encoding: [0xc0,0x05,A,A,A,A] diff --git a/llvm/test/TableGen/VarLenDecoder.td b/llvm/test/TableGen/VarLenDecoder.td new file mode 100644 index 0000000..657ca6a --- /dev/null +++ b/llvm/test/TableGen/VarLenDecoder.td @@ -0,0 +1,87 @@ +// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s + +include "llvm/Target/Target.td" + +def ArchInstrInfo : InstrInfo { } + +def Arch : Target { + let InstructionSet = ArchInstrInfo; +} + +def Reg : Register<"reg">; + +def RegClass : RegisterClass<"foo", [i64], 0, (add Reg)>; + +def GR64 : RegisterOperand<RegClass>; + +class MyMemOperand<dag sub_ops> : Operand<iPTR> { + let MIOperandInfo = sub_ops; + dag Base; + dag Extension; +} + +def MemOp16: MyMemOperand<(ops GR64:$reg, i16imm:$offset)>; + +def MemOp32: MyMemOperand<(ops GR64:$reg, i32imm:$offset)>; + +class MyVarInst<MyMemOperand memory_op> : Instruction { + dag Inst; + + let OutOperandList = (outs GR64:$dst); + let InOperandList = (ins memory_op:$src); +} + +def FOO16 : MyVarInst<MemOp16> { + let Inst = (ascend + (descend (operand "$dst", 3), 0b01000, (operand "$src.reg", 3)), + (slice "$src.offset", 15, 0) + ); +} +def FOO32 : MyVarInst<MemOp32> { + let Inst = (ascend + (descend (operand "$dst", 3), 0b01001, (operand "$src.reg", 3)), + (slice "$src.offset", 31, 16), + (slice "$src.offset", 15, 0) + ); +} + +// CHECK: MCD::OPC_ExtractField, 3, 5, // Inst{7-3} ... +// CHECK-NEXT: MCD::OPC_FilterValue, 8, 4, 0, 0, // Skip to: 12 +// CHECK-NEXT: MCD::OPC_Decode, 244, 1, 0, // Opcode: FOO16 +// CHECK-NEXT: MCD::OPC_FilterValue, 9, 4, 0, 0, // Skip to: 21 +// CHECK-NEXT: MCD::OPC_Decode, 245, 1, 1, // Opcode: FOO32 +// CHECK-NEXT: MCD::OPC_Fail, + +// Instruction length table +// CHECK: 27, +// CHECK-NEXT: 43, +// CHECK-NEXT: }; + +// CHECK: case 0: +// CHECK-NEXT: tmp = fieldFromInstruction(insn, 8, 3); +// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; } +// CHECK-NEXT: tmp = fieldFromInstruction(insn, 0, 3); +// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; } +// CHECK-NEXT: tmp = fieldFromInstruction(insn, 11, 16); +// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp)); +// CHECK-NEXT: return S; +// CHECK-NEXT: case 1: +// CHECK-NEXT: tmp = fieldFromInstruction(insn, 8, 3); +// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; } +// CHECK-NEXT: tmp = fieldFromInstruction(insn, 0, 3); +// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; } +// CHECK-NEXT: tmp = 0x0; +// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 11, 16), 16, 16); +// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 27, 16), 0, 16); +// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp)); +// CHECK-NEXT: return S; + +// CHECK-LABEL: case MCD::OPC_ExtractField: { +// CHECK: makeUp(insn, Start + Len); + +// CHECK-LABEL: case MCD::OPC_CheckField: { +// CHECK: makeUp(insn, Start + Len); + +// CHECK-LABEL: case MCD::OPC_Decode: { +// CHECK: Len = InstrLenTable[Opc]; +// CHECK-NEXT: makeUp(insn, Len); diff --git a/llvm/test/Transforms/ArgumentPromotion/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/min-legal-vector-width.ll new file mode 100644 index 0000000..e7a18929 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/min-legal-vector-width.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s + +; CHECK-LABEL: define i32 @foo() #0 { +; CHECK-NEXT: %.val = load <32 x half>, <32 x half>* undef, align 4 +; CHECK-NEXT: call void @bar(<32 x half> %.val) +; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: } + +; CHECK-LABEL: define internal void @bar(<32 x half> %.0.val) #0 { +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; CHECK: attributes #0 = { uwtable "min-legal-vector-width"="512" } + +define i32 @foo() #0 { + call void @bar(<32 x half>* undef) + ret i32 0 +} + +define internal void @bar(<32 x half>*) #0 { + %2 = load <32 x half>, <32 x half>* %0, align 4 + ret void +} + +attributes #0 = { uwtable "min-legal-vector-width"="0" } diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll index 718ae4a..2b2b113 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -57,9 +57,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6:[0-9]+]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7:[0-9]+]] +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -84,9 +84,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6:[0-9]+]] +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7:[0-9]+]] +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]] ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -151,9 +151,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -178,9 +178,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]] ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -245,9 +245,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -272,9 +272,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]] ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -339,9 +339,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -366,9 +366,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]] ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -431,8 +431,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -457,8 +457,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]] +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]] ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -521,8 +521,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -547,8 +547,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]] +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]] ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -608,14 +608,14 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; ; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { +; IS__TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR3]] { ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -635,14 +635,14 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; ; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; IS__CGSCC_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { +; IS__CGSCC_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR3]] { ; IS__CGSCC_NPM-NEXT: bb: ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]] ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -671,7 +671,7 @@ define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_p ; ; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable ; IS________NPM-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 -; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] { +; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] { ; IS________NPM-NEXT: bb: ; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -707,9 +707,9 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -734,9 +734,9 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]] ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -762,21 +762,37 @@ attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2 attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" } attributes #5 = { argmemonly nounwind } ;. -; IS__TUNIT____: attributes #[[ATTR0:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } -; IS__TUNIT____: attributes #[[ATTR1:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; IS__TUNIT____: attributes #[[ATTR2:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; IS__TUNIT____: attributes #[[ATTR3:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } -; IS__TUNIT____: attributes #[[ATTR4:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx2" } -; IS__TUNIT____: attributes #[[ATTR5:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly } -; IS__TUNIT____: attributes #[[ATTR6:[0-9]+]] = { willreturn writeonly } -; IS__TUNIT____: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind willreturn } +; IS__TUNIT_OPM: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } +; IS__TUNIT_OPM: attributes #[[ATTR1]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; IS__TUNIT_OPM: attributes #[[ATTR2]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; IS__TUNIT_OPM: attributes #[[ATTR3]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } +; IS__TUNIT_OPM: attributes #[[ATTR4]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx2" } +; IS__TUNIT_OPM: attributes #[[ATTR5:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly } +; IS__TUNIT_OPM: attributes #[[ATTR6]] = { willreturn writeonly } +; IS__TUNIT_OPM: attributes #[[ATTR7]] = { nofree nosync nounwind willreturn } +;. +; IS__TUNIT_NPM: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } +; IS__TUNIT_NPM: attributes #[[ATTR1]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; IS__TUNIT_NPM: attributes #[[ATTR2]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; IS__TUNIT_NPM: attributes #[[ATTR3]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } +; IS__TUNIT_NPM: attributes #[[ATTR4:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly } +; IS__TUNIT_NPM: attributes #[[ATTR5]] = { willreturn writeonly } +; IS__TUNIT_NPM: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn } +;. +; IS__CGSCC_OPM: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } +; IS__CGSCC_OPM: attributes #[[ATTR1]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; IS__CGSCC_OPM: attributes #[[ATTR2]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; IS__CGSCC_OPM: attributes #[[ATTR3]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } +; IS__CGSCC_OPM: attributes #[[ATTR4]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx2" } +; IS__CGSCC_OPM: attributes #[[ATTR5:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly } +; IS__CGSCC_OPM: attributes #[[ATTR6]] = { willreturn writeonly } +; IS__CGSCC_OPM: attributes #[[ATTR7]] = { nounwind willreturn } ;. -; IS__CGSCC____: attributes #[[ATTR0:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } -; IS__CGSCC____: attributes #[[ATTR1:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; IS__CGSCC____: attributes #[[ATTR2:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; IS__CGSCC____: attributes #[[ATTR3:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } -; IS__CGSCC____: attributes #[[ATTR4:[0-9]+]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx2" } -; IS__CGSCC____: attributes #[[ATTR5:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly } -; IS__CGSCC____: attributes #[[ATTR6:[0-9]+]] = { willreturn writeonly } -; IS__CGSCC____: attributes #[[ATTR7:[0-9]+]] = { nounwind willreturn } +; IS__CGSCC_NPM: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } +; IS__CGSCC_NPM: attributes #[[ATTR1]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; IS__CGSCC_NPM: attributes #[[ATTR2]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; IS__CGSCC_NPM: attributes #[[ATTR3]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } +; IS__CGSCC_NPM: attributes #[[ATTR4:[0-9]+]] = { argmemonly nofree nounwind willreturn writeonly } +; IS__CGSCC_NPM: attributes #[[ATTR5]] = { willreturn writeonly } +; IS__CGSCC_NPM: attributes #[[ATTR6]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/GlobalOpt/malloc-promote-6.ll b/llvm/test/Transforms/GlobalOpt/malloc-promote-6.ll new file mode 100644 index 0000000..f35e1ae --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/malloc-promote-6.ll @@ -0,0 +1,30 @@ +; RUN: opt -passes=globalopt -S < %s | FileCheck %s + +; CHECK-NOT: @global + +@global = internal global i8* null +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @zot, i8* null }] + +declare i8* @_Znwm(i64) + +define internal void @widget() { + %tmp = tail call i8* @_Znwm(i64 0) + %tmp2 = getelementptr inbounds i8, i8* %tmp, i64 0 + store i8* %tmp, i8** @global, align 8 + call void @baz(void ()* @spam) + ret void +} + +define internal void @spam() { + %tmp = load i8*, i8** @global, align 8 + %tmp2 = getelementptr inbounds i8, i8* %tmp, i64 0 + ret void +} + +define internal void @zot() { + call void @baz(void ()* @widget) + ret void +} + +declare void @baz(void ()*) + diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index 950a8ce..4b8d944 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -3,7 +3,6 @@ ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -inferattrs -S | FileCheck --match-full-lines --check-prefixes=CHECK,CHECK-KNOWN,CHECK-NOLINUX,CHECK-OPEN,CHECK-DARWIN %s ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -inferattrs -S | FileCheck --match-full-lines --check-prefixes=CHECK,CHECK-KNOWN,CHECK-LINUX %s ; RUN: opt < %s -mtriple=nvptx -inferattrs -S | FileCheck --match-full-lines --check-prefixes=CHECK-NOLINUX,CHECK-NVPTX %s -; RUN: opt < %s -mtriple=s390x-linux-gnu -inferattrs -S | FileCheck --check-prefixes=CHECK-SYSTEMZ %s declare i32 @__nvvm_reflect(i8*) ; CHECK-NVPTX: declare noundef i32 @__nvvm_reflect(i8* noundef) [[NOFREE_NOUNWIND_READNONE:#[0-9]+]] @@ -220,7 +219,7 @@ declare x86_fp80 @acoshl(x86_fp80) ; CHECK: declare x86_fp80 @acosl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @acosl(x86_fp80) -; CHECK: declare noalias noundef i8* @aligned_alloc(i64 allocalign noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] +; CHECK: declare noalias noundef i8* @aligned_alloc(i64 allocalign noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE1_FAMILY_MALLOC:#[0-9]+]] declare i8* @aligned_alloc(i64, i64) ; CHECK: declare double @asin(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] @@ -290,7 +289,7 @@ declare void @bcopy(i8*, i8*, i64) ; CHECK: declare void @bzero(i8* nocapture writeonly, i64) [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] declare void @bzero(i8*, i64) -; CHECK: declare noalias noundef i8* @calloc(i64 noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01:#[0-9]+]] +; CHECK: declare noalias noundef i8* @calloc(i64 noundef, i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_MALLOC:#[0-9]+]] declare i8* @calloc(i64, i64) ; CHECK: declare double @cbrt(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] @@ -490,7 +489,7 @@ declare i32 @fputs(i8*, %opaque*) ; CHECK: declare noundef i64 @fread(i8* nocapture noundef, i64 noundef, i64 noundef, %opaque* nocapture noundef) [[NOFREE_NOUNWIND]] declare i64 @fread(i8*, i64, i64, %opaque*) -; CHECK: declare void @free(i8* nocapture noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN:#[0-9]+]] +; CHECK: declare void @free(i8* allocptr nocapture noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_MALLOC:#[0-9]+]] declare void @free(i8*) ; CHECK: declare double @frexp(double, i32* nocapture) [[NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] @@ -592,11 +591,9 @@ declare i64 @labs(i64) declare i32 @lchown(i8*, i32, i32) ; CHECK: declare double @ldexp(double, i32) [[NOFREE_WILLRETURN:#[0-9]+]] -; CHECK-SYSTEMZ: declare double @ldexp(double, i32 signext) declare double @ldexp(double, i32) ; CHECK: declare float @ldexpf(float, i32) [[NOFREE_WILLRETURN]] -; CHECK-SYSTEMZ: declare float @ldexpf(float, i32 signext) declare float @ldexpf(float, i32) ; CHECK: declare x86_fp80 @ldexpl(x86_fp80, i32) [[NOFREE_WILLRETURN]] @@ -656,7 +653,7 @@ declare i32 @lstat(i8*, %opaque*) ; CHECK-LINUX: declare noundef i32 @lstat64(i8* nocapture noundef readonly, %opaque* nocapture noundef) [[NOFREE_NOUNWIND]] declare i32 @lstat64(i8*, %opaque*) -; CHECK: declare noalias noundef i8* @malloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE:#[0-9]+]] +; CHECK: declare noalias noundef i8* @malloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_MALLOC:#[0-9]+]] declare i8* @malloc(i64) ; CHECK-LINUX: declare noalias noundef i8* @memalign(i64 allocalign, i64) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] @@ -756,12 +753,10 @@ declare i32 @printf(i8*, ...) declare i32 @putc(i32, %opaque*) ; CHECK: declare noundef i32 @putchar(i32 noundef) [[NOFREE_NOUNWIND]] -; CHECK-SYSTEMZ: declare noundef i32 @putchar(i32 noundef signext) declare i32 @putchar(i32) ; CHECK-KNOWN: declare noundef i32 @putchar_unlocked(i32 noundef) [[NOFREE_NOUNWIND]] ; CHECK-UNKNOWN: declare i32 @putchar_unlocked(i32){{$}} -; CHECK-SYSTEMZ: declare noundef i32 @putchar_unlocked(i32 noundef signext) declare i32 @putchar_unlocked(i32) ; CHECK: declare noundef i32 @puts(i8* nocapture noundef readonly) [[NOFREE_NOUNWIND]] @@ -779,10 +774,10 @@ declare i64 @read(i32, i8*, i64) ; CHECK: declare noundef i64 @readlink(i8* nocapture noundef readonly, i8* nocapture noundef, i64 noundef) [[NOFREE_NOUNWIND]] declare i64 @readlink(i8*, i8*, i64) -; CHECK: declare noalias noundef i8* @realloc(i8* nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE:#[0-9]+]] +; CHECK: declare noalias noundef i8* @realloc(i8* allocptr nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_MALLOC:#[0-9]+]] declare i8* @realloc(i8*, i64) -; CHECK: declare noalias noundef i8* @reallocf(i8* nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE]] +; CHECK: declare noalias noundef i8* @reallocf(i8* allocptr nocapture, i64 noundef) [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_MALLOC]] declare i8* @reallocf(i8*, i64) ; CHECK: declare noundef i8* @realpath(i8* nocapture noundef readonly, i8* noundef) [[NOFREE_NOUNWIND]] @@ -905,7 +900,7 @@ declare i8* @strcpy(i8*, i8*) ; CHECK: declare i64 @strcspn(i8* nocapture, i8* nocapture) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY]] declare i64 @strcspn(i8*, i8*) -; CHECK: declare noalias i8* @strdup(i8* nocapture readonly) [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]] +; CHECK: declare noalias i8* @strdup(i8* nocapture readonly) [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC:#[0-9]+]] declare i8* @strdup(i8*) ; CHECK: declare i64 @strlen(i8* nocapture) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY]] @@ -923,7 +918,7 @@ declare i32 @strncmp(i8*, i8*, i64) ; CHECK: declare i8* @strncpy(i8* noalias returned writeonly, i8* noalias nocapture readonly, i64) [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] declare i8* @strncpy(i8*, i8*, i64) -; CHECK: declare noalias i8* @strndup(i8* nocapture readonly, i64 noundef) [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN]] +; CHECK: declare noalias i8* @strndup(i8* nocapture readonly, i64 noundef) [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] declare i8* @strndup(i8*, i64) ; CHECK: declare i64 @strnlen(i8* nocapture, i64) [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN:#[0-9]+]] @@ -1031,7 +1026,7 @@ declare i32 @utime(i8*, %opaque*) ; CHECK: declare noundef i32 @utimes(i8* nocapture noundef readonly, %opaque* nocapture noundef readonly) [[NOFREE_NOUNWIND]] declare i32 @utimes(i8*, %opaque*) -; CHECK: declare noalias noundef i8* @valloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE]] +; CHECK: declare noalias noundef i8* @valloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_MALLOC]] declare i8* @valloc(i64) ; CHECK: declare noundef i32 @vfprintf(%opaque* nocapture noundef, i8* nocapture noundef readonly, %opaque* noundef) [[NOFREE_NOUNWIND]] @@ -1070,16 +1065,18 @@ declare void @memset_pattern16(i8*, i8*, i64) ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { mustprogress nofree nounwind willreturn } ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] = { mustprogress nofree nounwind willreturn writeonly } ; CHECK-DAG: attributes [[NOFREE_NOUNWIND]] = { nofree nounwind } -; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0) } -; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0,1) } +; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE1_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(1) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0,1) "alloc-family"="malloc" } ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { mustprogress nofree nounwind readonly willreturn } ; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] = { argmemonly mustprogress nofree nounwind willreturn } ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY]] = { nofree nounwind readonly } -; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn } -; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allocsize(1) } +; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn "alloc-family"="malloc" } ; CHECK-DAG: attributes [[NOFREE_WILLRETURN]] = { mustprogress nofree willreturn } +; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0) "alloc-family"="malloc" } ; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { argmemonly mustprogress nofree nounwind readonly willreturn } ; CHECK-DAG: attributes [[NOFREE]] = { nofree } -; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN]] = { inaccessiblemem_or_argmemonly mustprogress nofree nounwind willreturn } ; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND]] = { argmemonly nofree nounwind } +; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allocsize(1) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nofree nounwind willreturn "alloc-family"="malloc" } + ; CHECK-NVPTX-DAG: attributes [[NOFREE_NOUNWIND_READNONE]] = { nofree nosync nounwind readnone } diff --git a/llvm/test/Transforms/InstCombine/SystemZ/libcall-arg-exts.ll b/llvm/test/Transforms/InstCombine/SystemZ/libcall-arg-exts.ll new file mode 100644 index 0000000..dbd9204 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/SystemZ/libcall-arg-exts.ll @@ -0,0 +1,98 @@ +; RUN: opt < %s -passes=instcombine -S -mtriple=systemz-unknown | FileCheck %s +; +; Check that i32 arguments to generated libcalls have the proper extension +; attributes. + + +declare double @exp2(double) +declare float @exp2f(float) +declare fp128 @exp2l(fp128) + +define double @fun1(i32 %x) { +; CHECK-LABEL: @fun1 +; CHECK: call double @ldexp + %conv = sitofp i32 %x to double + %ret = call double @exp2(double %conv) + ret double %ret +} + +define float @fun2(i32 %x) { +; CHECK-LABEL: @fun2 +; CHECK: call float @ldexpf + %conv = sitofp i32 %x to float + %ret = call float @exp2f(float %conv) + ret float %ret +} + +define fp128 @fun3(i8 zeroext %x) { +; CHECK-LABEL: @fun3 +; CHECK: call fp128 @ldexpl + %conv = uitofp i8 %x to fp128 + %ret = call fp128 @exp2l(fp128 %conv) + ret fp128 %ret +} + +@a = common global [60 x i8] zeroinitializer, align 1 +@b = common global [60 x i8] zeroinitializer, align 1 +declare i8* @__memccpy_chk(i8*, i8*, i32, i64, i64) +define i8* @fun4() { +; CHECK-LABEL: @fun4 +; CHECK: call i8* @memccpy + %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0 + %ret = call i8* @__memccpy_chk(i8* %dst, i8* %src, i32 0, i64 60, i64 -1) + ret i8* %ret +} + +%FILE = type { } +@A = constant [2 x i8] c"A\00" +declare i32 @fputs(i8*, %FILE*) +define void @fun5(%FILE* %fp) { +; CHECK-LABEL: @fun5 +; CHECK: call i32 @fputc + %str = getelementptr [2 x i8], [2 x i8]* @A, i32 0, i32 0 + call i32 @fputs(i8* %str, %FILE* %fp) + ret void +} + +@empty = constant [1 x i8] zeroinitializer +declare i32 @puts(i8*) +define void @fun6() { +; CHECK-LABEL: @fun6 +; CHECK: call i32 @putchar + %str = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0 + call i32 @puts(i8* %str) + ret void +} + +@.str1 = private constant [2 x i8] c"a\00" +declare i8* @strstr(i8*, i8*) +define i8* @fun7(i8* %str) { +; CHECK-LABEL: @fun7 +; CHECK: call i8* @strchr + %pat = getelementptr inbounds [2 x i8], [2 x i8]* @.str1, i32 0, i32 0 + %ret = call i8* @strstr(i8* %str, i8* %pat) + ret i8* %ret +} + +; CHECK: declare i8* @strchr(i8*, i32 signext) + +@hello = constant [14 x i8] c"hello world\5Cn\00" +@chp = global i8* zeroinitializer +declare i8* @strchr(i8*, i32) +define void @fun8(i32 %chr) { +; CHECK-LABEL: @fun8 +; CHECK: call i8* @memchr + %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0 + %dst = call i8* @strchr(i8* %src, i32 %chr) + store i8* %dst, i8** @chp + ret void +} + +; CHECK: declare double @ldexp(double, i32 signext) +; CHECK: declare float @ldexpf(float, i32 signext) +; CHECK: declare fp128 @ldexpl(fp128, i32 signext) +; CHECK: declare i8* @memccpy(i8* noalias writeonly, i8* noalias nocapture readonly, i32 signext, i64) +; CHECK: declare noundef i32 @fputc(i32 noundef signext, %FILE* nocapture noundef) +; CHECK: declare noundef i32 @putchar(i32 noundef signext) +; CHECK: declare i8* @memchr(i8*, i32 signext, i64) diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 8e5d6f2..9fc7909 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -1270,8 +1270,8 @@ define <2 x i8> @ashr_add_sexts(<2 x i1> %x, <2 x i1> %y) { define i32 @cmp_math_sexts(i32 %x, i32 %y) { ; CHECK-LABEL: @cmp_math_sexts( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = zext i1 [[DOTNOT]] to i32 ; CHECK-NEXT: ret i32 [[R]] ; %gt = icmp ugt i32 %x, %y @@ -1392,3 +1392,307 @@ define i8 @add_like_or_t2_extrause(i8 %x) { %r = add i8 %i1, 42 ret i8 %r } + +define i8 @add_and_xor(i8 %x, i8 %y) { +; CHECK-LABEL: @add_and_xor( +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %xor = xor i8 %x, -1 + %and = and i8 %xor, %y + %add = add i8 %and, %x + ret i8 %add +} + +define i8 @add_and_xor_wrong_const(i8 %x, i8 %y) { +; CHECK-LABEL: @add_and_xor_wrong_const( +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[X:%.*]], -2 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[AND]], [[X]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %xor = xor i8 %x, -2 + %and = and i8 %xor, %y + %add = add i8 %and, %x + ret i8 %add +} + +define i8 @add_and_xor_wrong_op(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @add_and_xor_wrong_op( +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[Z:%.*]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[AND]], [[X:%.*]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %xor = xor i8 %z, -1 + %and = and i8 %xor, %y + %add = add i8 %and, %x + ret i8 %add +} + +define i8 @add_and_xor_commuted1(i8 %x, i8 %_y) { +; CHECK-LABEL: @add_and_xor_commuted1( +; CHECK-NEXT: [[Y:%.*]] = udiv i8 42, [[_Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[Y]], [[X:%.*]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %y = udiv i8 42, %_y ; thwart complexity-based canonicalization + %xor = xor i8 %x, -1 + %and = and i8 %y, %xor + %add = add i8 %and, %x + ret i8 %add +} + +define i8 @add_and_xor_commuted2(i8 %_x, i8 %y) { +; CHECK-LABEL: @add_and_xor_commuted2( +; CHECK-NEXT: [[X:%.*]] = udiv i8 42, [[_X:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %x = udiv i8 42, %_x ; thwart complexity-based canonicalization + %xor = xor i8 %x, -1 + %and = and i8 %xor, %y + %add = add i8 %x, %and + ret i8 %add +} + +define i8 @add_and_xor_commuted3(i8 %_x, i8 %_y) { +; CHECK-LABEL: @add_and_xor_commuted3( +; CHECK-NEXT: [[X:%.*]] = udiv i8 42, [[_X:%.*]] +; CHECK-NEXT: [[Y:%.*]] = udiv i8 42, [[_Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[X]], [[Y]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %x = udiv i8 42, %_x ; thwart complexity-based canonicalization + %y = udiv i8 42, %_y ; thwart complexity-based canonicalization + %xor = xor i8 %x, -1 + %and = and i8 %y, %xor + %add = add i8 %x, %and + ret i8 %add +} + +define i8 @add_and_xor_extra_use(i8 %x, i8 %y) { +; CHECK-LABEL: @add_and_xor_extra_use( +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[X:%.*]], -1 +; CHECK-NEXT: call void @use(i8 [[XOR]]) +; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[Y]], [[X]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %xor = xor i8 %x, -1 + call void @use(i8 %xor) + %and = and i8 %xor, %y + call void @use(i8 %and) + %add = add i8 %and, %x + ret i8 %add +} + +define i8 @add_xor_and_const(i8 %x) { +; CHECK-LABEL: @add_xor_and_const( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 42 +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[AND]], 42 +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[XOR]], [[X]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %and = and i8 %x, 42 + %xor = xor i8 %and, 42 + %add = add i8 %xor, %x + ret i8 %add +} + +define i8 @add_xor_and_const_wrong_const(i8 %x) { +; CHECK-LABEL: @add_xor_and_const_wrong_const( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 42 +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[AND]], 88 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[XOR]], [[X]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %and = and i8 %x, 42 + %xor = xor i8 %and, 88 + %add = add i8 %xor, %x + ret i8 %add +} + +define i8 @add_xor_and_var(i8 %x, i8 %y) { +; CHECK-LABEL: @add_xor_and_var( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[AND]], [[Y]] +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[XOR]], [[X]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %and = and i8 %x, %y + call void @use(i8 %and) + %xor = xor i8 %and, %y + %add = add i8 %xor, %x + ret i8 %add +} + +define i8 @add_xor_and_var_wrong_op1(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @add_xor_and_var_wrong_op1( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[AND]], [[Z:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[XOR]], [[X]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %and = and i8 %x, %y + call void @use(i8 %and) + %xor = xor i8 %and, %z + %add = add i8 %xor, %x + ret i8 %add +} + +define i8 @add_xor_and_var_wrong_op2(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @add_xor_and_var_wrong_op2( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[AND]], [[Y]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[XOR]], [[Z:%.*]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %and = and i8 %x, %y + call void @use(i8 %and) + %xor = xor i8 %and, %y + %add = add i8 %xor, %z + ret i8 %add +} + +define i8 @add_xor_and_var_commuted1(i8 %x, i8 %y) { +; CHECK-LABEL: @add_xor_and_var_commuted1( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[AND]], [[Y]] +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[XOR]], [[X]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %and = and i8 %y, %x + call void @use(i8 %and) + %xor = xor i8 %and, %y + %add = add i8 %xor, %x + ret i8 %add +} + +define i8 @add_xor_and_var_commuted2(i8 %x, i8 %_y) { +; CHECK-LABEL: @add_xor_and_var_commuted2( +; CHECK-NEXT: [[Y:%.*]] = udiv i8 42, [[_Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i8 [[Y]], [[X:%.*]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[Y]], [[AND]] +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[XOR]], [[X]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %y = udiv i8 42, %_y ; thwart complexity-based canonicalization + %and = and i8 %x, %y + call void @use(i8 %and) + %xor = xor i8 %y, %and + %add = add i8 %xor, %x + ret i8 %add +} + +define i8 @add_xor_and_var_commuted3(i8 %x, i8 %_y) { +; CHECK-LABEL: @add_xor_and_var_commuted3( +; CHECK-NEXT: [[Y:%.*]] = udiv i8 42, [[_Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i8 [[Y]], [[X:%.*]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[Y]], [[AND]] +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[XOR]], [[X]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %y = udiv i8 42, %_y ; thwart complexity-based canonicalization + %and = and i8 %y, %x + call void @use(i8 %and) + %xor = xor i8 %y, %and + %add = add i8 %xor, %x + ret i8 %add +} + +define i8 @add_xor_and_var_commuted4(i8 %_x, i8 %y) { +; CHECK-LABEL: @add_xor_and_var_commuted4( +; CHECK-NEXT: [[X:%.*]] = udiv i8 42, [[_X:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[AND]], [[Y]] +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[X]], [[XOR]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %x = udiv i8 42, %_x ; thwart complexity-based canonicalization + %and = and i8 %x, %y + call void @use(i8 %and) + %xor = xor i8 %and, %y + %add = add i8 %x, %xor + ret i8 %add +} + +define i8 @add_xor_and_var_commuted5(i8 %_x, i8 %y) { +; CHECK-LABEL: @add_xor_and_var_commuted5( +; CHECK-NEXT: [[X:%.*]] = udiv i8 42, [[_X:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[AND]], [[Y]] +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[X]], [[XOR]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %x = udiv i8 42, %_x ; thwart complexity-based canonicalization + %and = and i8 %y, %x + call void @use(i8 %and) + %xor = xor i8 %and, %y + %add = add i8 %x, %xor + ret i8 %add +} + +define i8 @add_xor_and_var_commuted6(i8 %_x, i8 %_y) { +; CHECK-LABEL: @add_xor_and_var_commuted6( +; CHECK-NEXT: [[X:%.*]] = udiv i8 42, [[_X:%.*]] +; CHECK-NEXT: [[Y:%.*]] = udiv i8 42, [[_Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[Y]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[Y]], [[AND]] +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[X]], [[XOR]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %x = udiv i8 42, %_x ; thwart complexity-based canonicalization + %y = udiv i8 42, %_y ; thwart complexity-based canonicalization + %and = and i8 %x, %y + call void @use(i8 %and) + %xor = xor i8 %y, %and + %add = add i8 %x, %xor + ret i8 %add +} + +define i8 @add_xor_and_var_commuted7(i8 %_x, i8 %_y) { +; CHECK-LABEL: @add_xor_and_var_commuted7( +; CHECK-NEXT: [[X:%.*]] = udiv i8 42, [[_X:%.*]] +; CHECK-NEXT: [[Y:%.*]] = udiv i8 42, [[_Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i8 [[Y]], [[X]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[Y]], [[AND]] +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[X]], [[XOR]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %x = udiv i8 42, %_x ; thwart complexity-based canonicalization + %y = udiv i8 42, %_y ; thwart complexity-based canonicalization + %and = and i8 %y, %x + call void @use(i8 %and) + %xor = xor i8 %y, %and + %add = add i8 %x, %xor + ret i8 %add +} + +define i8 @add_xor_and_var_extra_use(i8 %x, i8 %y) { +; CHECK-LABEL: @add_xor_and_var_extra_use( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[AND]], [[Y]] +; CHECK-NEXT: call void @use(i8 [[XOR]]) +; CHECK-NEXT: [[ADD:%.*]] = or i8 [[XOR]], [[X]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %and = and i8 %x, %y + call void @use(i8 %and) + %xor = xor i8 %and, %y + call void @use(i8 %xor) + %add = add i8 %xor, %x + ret i8 %add +} diff --git a/llvm/test/Transforms/InstCombine/atomic.ll b/llvm/test/Transforms/InstCombine/atomic.ll index ec79891..e3afffe 100644 --- a/llvm/test/Transforms/InstCombine/atomic.ll +++ b/llvm/test/Transforms/InstCombine/atomic.ll @@ -426,7 +426,6 @@ define void @no_atomic_vector_store(<2 x float> %p, i8* %p2) { define i32 @atomic_load_from_constant_global() { ; CHECK-LABEL: @atomic_load_from_constant_global( -; CHECK-NEXT: [[V:%.*]] = load atomic i32, i32* @c seq_cst, align 4 ; CHECK-NEXT: ret i32 42 ; %v = load atomic i32, i32* @c seq_cst, align 4 @@ -435,7 +434,6 @@ define i32 @atomic_load_from_constant_global() { define i8 @atomic_load_from_constant_global_bitcast() { ; CHECK-LABEL: @atomic_load_from_constant_global_bitcast( -; CHECK-NEXT: [[V:%.*]] = load atomic i8, i8* bitcast (i32* @c to i8*) seq_cst, align 1 ; CHECK-NEXT: ret i8 42 ; %v = load atomic i8, i8* bitcast (i32* @c to i8*) seq_cst, align 1 diff --git a/llvm/test/Transforms/InstCombine/constant-fold-gep.ll b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll index 928409a4..ba38615 100644 --- a/llvm/test/Transforms/InstCombine/constant-fold-gep.ll +++ b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll @@ -126,7 +126,7 @@ declare void @use.ptr(i8*) define i8* @gep_sub_self() { ; CHECK-LABEL: @gep_sub_self( -; CHECK-NEXT: ret i8* null +; CHECK-NEXT: ret i8* getelementptr (i8, i8* @g, i64 sub (i64 0, i64 ptrtoint (i8* @g to i64))) ; %p.int = ptrtoint i8* @g to i64 %p.int.neg = sub i64 0, %p.int @@ -136,7 +136,7 @@ define i8* @gep_sub_self() { define i8* @gep_sub_self_plus_addr(i64 %addr) { ; CHECK-LABEL: @gep_sub_self_plus_addr( -; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, i8* null, i64 [[ADDR:%.*]] +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, i8* getelementptr (i8, i8* @g, i64 sub (i64 0, i64 ptrtoint (i8* @g to i64))), i64 [[ADDR:%.*]] ; CHECK-NEXT: ret i8* [[P2]] ; %p.int = ptrtoint i8* @g to i64 @@ -164,7 +164,7 @@ define i8* @gep_plus_addr_sub_self_in_loop() { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[ADDR:%.*]] = call i64 @get.i64() -; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, i8* null, i64 [[ADDR]] +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, i8* getelementptr (i8, i8* @g, i64 sub (i64 0, i64 ptrtoint (i8* @g to i64))), i64 [[ADDR]] ; CHECK-NEXT: call void @use.ptr(i8* [[P2]]) ; CHECK-NEXT: br label [[LOOP]] ; @@ -182,7 +182,7 @@ loop: define i8* @gep_sub_other() { ; CHECK-LABEL: @gep_sub_other( -; CHECK-NEXT: ret i8* inttoptr (i64 sub (i64 ptrtoint (i8* @g to i64), i64 ptrtoint (i8* @g2 to i64)) to i8*) +; CHECK-NEXT: ret i8* getelementptr (i8, i8* @g, i64 sub (i64 0, i64 ptrtoint (i8* @g2 to i64))) ; %p.int = ptrtoint i8* @g2 to i64 %p.int.neg = sub i64 0, %p.int diff --git a/llvm/test/Transforms/InstCombine/double-float-shrink-1.ll b/llvm/test/Transforms/InstCombine/double-float-shrink-1.ll index 13bf554..85c9a01 100644 --- a/llvm/test/Transforms/InstCombine/double-float-shrink-1.ll +++ b/llvm/test/Transforms/InstCombine/double-float-shrink-1.ll @@ -346,7 +346,7 @@ define float @logb_test1(float %f) { ; LINUX-NEXT: [[LOGBF:%.*]] = call fast float @logbf(float [[F:%.*]]) ; LINUX-NEXT: ret float [[LOGBF]] ; MS32: [[POWF:%.*]] = call fast double @logb(double [[F:%.*]]) -; MS64-NEXT: [[LOGBF:%.*]] = call fast float @logbf(float [[F:%.*]]) +; MS64-NEXT: [[LOGBF:%.*]] = call fast float @_logbf(float [[F:%.*]]) ; %conv = fpext float %f to double %call = call fast double @logb(double %conv) diff --git a/llvm/test/Transforms/InstCombine/pr39177.ll b/llvm/test/Transforms/InstCombine/pr39177.ll index c13fc00..0671b5a 100644 --- a/llvm/test/Transforms/InstCombine/pr39177.ll +++ b/llvm/test/Transforms/InstCombine/pr39177.ll @@ -1,5 +1,7 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; +; Check that SimplifyLibCalls do not (crash or) emit a library call if user +; has made a function alias with the same name. %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } @@ -28,10 +30,8 @@ entry: define void @foo() { ; CHECK-LABEL: @foo( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @fwrite(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i64 7, i64 1, %struct._IO_FILE* [[TMP0]]) -; CHECK-NEXT: ret void +; CHECK-NOT: call i64 @fwrite( +; CHECK: call {{.*}} @fprintf( ; entry: %retval = alloca i32, align 4 diff --git a/llvm/test/Transforms/InstCombine/pr55228.ll b/llvm/test/Transforms/InstCombine/pr55228.ll new file mode 100644 index 0000000..5ef9f00 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr55228.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +target datalayout = "p:8:8" + +@g = external global i8 +@c = constant ptr getelementptr inbounds (i8, ptr @g, i64 1) + +define i1 @test(ptr %p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P:%.*]], getelementptr inbounds (i8, ptr @g, i8 1) +; CHECK-NEXT: ret i1 [[CMP]] +; + %alloca = alloca ptr + call void @llvm.memcpy.p0.p0.i32(ptr %alloca, ptr @c, i32 0, i1 false) + %load = load ptr, ptr %alloca + %cmp = icmp eq ptr %p, %load + ret i1 %cmp +} + +declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll index fa686c9..4746587 100644 --- a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll +++ b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll @@ -242,5 +242,18 @@ define i4 @strlen(i8* %s) { ret i4 0 } +; Test emission of stpncpy. +@a = dso_local global [4 x i8] c"123\00" +@b = dso_local global [5 x i8] zeroinitializer +declare i8* @__stpncpy_chk(i8* noundef, i8* noundef, i32 noundef, i32 noundef) +define signext i32 @emit_stpncpy() { +; CHECK-LABEL: @emit_stpncpy( +; CHECK-NEXT: call i8* @stpncpy({{.*}} @b, {{.*}} @a, {{.*}} i32 2) + %call = call i8* @__stpncpy_chk(i8* noundef getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0), + i8* noundef getelementptr inbounds ([4 x i8], [4 x i8]* @a, i32 0, i32 0), + i32 noundef 2, i32 noundef 5) + ret i32 0 +} + attributes #0 = { nobuiltin } attributes #1 = { builtin } diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll index 44da8b7..b08c1eb 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll @@ -1,15 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -O3 -mtriple=x86_64-- -mcpu=core2 | FileCheck %s declare i1 @check() nounwind declare i1 @foo(i8*, i8*, i8*) nounwind ; Check that redundant phi elimination ran -; CHECK: @test -; CHECK: %while.body.i -; CHECK: movs -; CHECK-NOT: movs -; CHECK: %for.end.i define i32 @test(i8* %base) nounwind uwtable ssp { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset %rbx, -48 +; CHECK-NEXT: .cfi_offset %r12, -40 +; CHECK-NEXT: .cfi_offset %r13, -32 +; CHECK-NEXT: .cfi_offset %r14, -24 +; CHECK-NEXT: .cfi_offset %r15, -16 +; CHECK-NEXT: movq %rdi, %r14 +; CHECK-NEXT: leaq 16(%rdi), %r15 +; CHECK-NEXT: movl $16, %eax +; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %while.body.i +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_2 Depth 2 +; CHECK-NEXT: movslq %r12d, %r13 +; CHECK-NEXT: movq %rax, %r12 +; CHECK-NEXT: leaq (%r15,%r13), %rbx +; CHECK-NEXT: addq $16, %r13 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_2: # %for.body.i +; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: callq check@PLT +; CHECK-NEXT: incq %rbx +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.3: # %for.end.i +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: addq %r14, %r13 +; CHECK-NEXT: movq %r13, %rdi +; CHECK-NEXT: movq %r13, %rsi +; CHECK-NEXT: callq foo@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_5 +; CHECK-NEXT: # %bb.4: # %cond.true29.i +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: leaq 16(%r12), %rax +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_5: # %cond.false35.i entry: br label %while.body.lr.ph.i @@ -58,11 +104,20 @@ exit: ; preds = %cond.true29.i, %cond.true.i ; Test phi reuse after LSR that requires SCEVExpander to hoist an ; interesting GEP. ; -; CHECK: @test2 -; CHECK: %entry -; CHECK-NOT: mov -; CHECK: je define void @test2(i32 %n) nounwind uwtable { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB1_1 +; CHECK-NEXT: # %bb.3: # %while.end +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_1: # %for.cond468.preheader +; CHECK-NEXT: cmpl $2, %edi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB1_2: # %for.inc498 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB1_2 entry: br i1 undef, label %while.end, label %for.cond468 @@ -95,16 +150,36 @@ while.end: ; preds = %entry ; Test redundant phi elimination when the deleted phi's increment is ; itself a phi. ; -; CHECK: @test3 -; CHECK: %meshBB1 -; CHECK: %meshBB -; CHECK-NEXT: Parent Loop -; CHECK-NEXT: Inner Loop -; CHECK-NEXT: incq -; CHECK: testb -; CHECK: je -; CHECK: jmp define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB2_6 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB2_2: # %meshBB1 +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB2_4 Depth 2 +; CHECK-NEXT: # implicit-def: $rcx +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB2_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB2_4: # %meshBB +; CHECK-NEXT: # Parent Loop BB2_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: incq %rcx +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB2_4 +; CHECK-NEXT: jmp .LBB2_2 +; CHECK-NEXT: .LBB2_6: # %meshBB5 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB2_5 +; CHECK-NEXT: # %bb.7: # %eval_At_times_u.exit +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB2_5: # %for.inc8.us.i2 entry: br i1 undef, label %meshBB1, label %meshBB5 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll index feafd88..56d8a07 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll @@ -320,10 +320,20 @@ for.end: ; ADD (with reduction stored in invariant address) -; CHECK-REMARK: loop not vectorized: value that could not be identified as reduction is used outside the loop +; CHECK-REMARK: vectorized loop (vectorization width: vscale x 4, interleaved count: 2) define void @invariant_store(i32* %dst, i32* readonly %src) { ; CHECK-LABEL: @invariant_store -; CHECK-NOT: vector.body +; CHECK: vector.body: +; CHECK: %[[LOAD1:.*]] = load <vscale x 4 x i32> +; CHECK: %[[LOAD2:.*]] = load <vscale x 4 x i32> +; CHECK: %[[ADD1:.*]] = add <vscale x 4 x i32> %{{.*}}, %[[LOAD1]] +; CHECK: %[[ADD2:.*]] = add <vscale x 4 x i32> %{{.*}}, %[[LOAD2]] +; CHECK: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[ADD1]] +; CHECK: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[ADD2]] +; CHECK: middle.block: +; CHECK: %[[ADD:.*]] = add <vscale x 4 x i32> %[[ADD2]], %[[ADD1]] +; CHECK-NEXT: %[[SUM:.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %[[ADD]]) +; CHECK-NEXT: store i32 %[[SUM]], i32* %gep.dst, align 4 entry: %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42 store i32 0, i32* %gep.dst, align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll index a0dba42..6212d82 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll @@ -1139,10 +1139,10 @@ define float @fmuladd_scalar_vf(float* %a, float* %b, i64 %n) { ; CHECK-UNORDERED: [[LOAD5:%.*]] = load float, float* ; CHECK-UNORDERED: [[LOAD6:%.*]] = load float, float* ; CHECK-UNORDERED: [[LOAD7:%.*]] = load float, float* -; CHECK-UNORDERED: [[FMULADD]] = call float @llvm.fmuladd.f32(float [[LOAD]], float [[LOAD4]], float [[VEC_PHI]]) -; CHECK-UNORDERED: [[FMULADD1]] = call float @llvm.fmuladd.f32(float [[LOAD1]], float [[LOAD5]], float [[VEC_PHI1]]) -; CHECK-UNORDERED: [[FMULADD2]] = call float @llvm.fmuladd.f32(float [[LOAD2]], float [[LOAD6]], float [[VEC_PHI2]]) -; CHECK-UNORDERED: [[FMULADD3]] = call float @llvm.fmuladd.f32(float [[LOAD3]], float [[LOAD7]], float [[VEC_PHI3]]) +; CHECK-UNORDERED: [[FMULADD]] = tail call float @llvm.fmuladd.f32(float [[LOAD]], float [[LOAD4]], float [[VEC_PHI]]) +; CHECK-UNORDERED: [[FMULADD1]] = tail call float @llvm.fmuladd.f32(float [[LOAD1]], float [[LOAD5]], float [[VEC_PHI1]]) +; CHECK-UNORDERED: [[FMULADD2]] = tail call float @llvm.fmuladd.f32(float [[LOAD2]], float [[LOAD6]], float [[VEC_PHI2]]) +; CHECK-UNORDERED: [[FMULADD3]] = tail call float @llvm.fmuladd.f32(float [[LOAD3]], float [[LOAD7]], float [[VEC_PHI3]]) ; CHECK-UNORDERED-NOT: llvm.vector.reduce.fadd ; CHECK-UNORDERED: middle.block: ; CHECK-UNORDERED: [[BIN_RDX:%.*]] = fadd float [[FMULADD1]], [[FMULADD]] diff --git a/llvm/test/Transforms/LoopVectorize/fpsat.ll b/llvm/test/Transforms/LoopVectorize/fpsat.ll new file mode 100644 index 0000000..7e3f743 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/fpsat.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s + +define void @signed(ptr %x, ptr %y, i32 %n) { +; CHECK-LABEL: @signed( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[Y:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[Y]], [[UGLYGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[X]], [[UGLYGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[TMP8]]) +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %1 = tail call i32 @llvm.fptosi.sat.i32.f32(float %0) + %arrayidx2 = getelementptr inbounds i32, ptr %y, i64 %indvars.iv + store i32 %1, ptr %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @unsigned(ptr %x, ptr %y, i32 %n) { +; CHECK-LABEL: @unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[Y:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[Y]], [[UGLYGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[X]], [[UGLYGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope !8 +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4, !alias.scope !11, !noalias !8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[TMP8]]) +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %1 = tail call i32 @llvm.fptoui.sat.i32.f32(float %0) + %arrayidx2 = getelementptr inbounds i32, ptr %y, i64 %indvars.iv + store i32 %1, ptr %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +declare i32 @llvm.fptosi.sat.i32.f32(float) +declare i32 @llvm.fptoui.sat.i32.f32(float) diff --git a/llvm/test/Transforms/LoopVectorize/interleave-with-call.ll b/llvm/test/Transforms/LoopVectorize/interleave-with-call.ll index e92400e..5cca6a6 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-with-call.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-with-call.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[IV_STEPS:%.]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<%start>, ir<1> -; CHECK-NEXT: WIDEN-CALL ir<%min> = call @llvm.smin.i32(vp<[[IV_STEPS]]>, ir<65535>) +; CHECK-NEXT: CLONE ir<%min> = call vp<[[IV_STEPS]]>, ir<65535>, ir<@llvm.smin.i32> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%dst>, vp<[[IV_STEPS]]> ; CHECK-NEXT: CLONE store ir<%min>, ir<%arrayidx> ; CHECK-NEXT: EMIT vp<[[INC:%.+]]> = VF * UF +(nuw) vp<[[CAN_IV]]> @@ -27,8 +27,8 @@ define void @test(i32 %start, ptr %dst) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 %start, [[INDEX]] ; CHECK-NEXT: [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[INDUCTION1:%.*]] = add i32 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[INDUCTION]], i32 65535) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[INDUCTION1]], i32 65535) +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.smin.i32(i32 [[INDUCTION]], i32 65535) +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[INDUCTION1]], i32 65535) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDUCTION]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDUCTION1]] ; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll index d002b1b..862a384 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll @@ -11,7 +11,23 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; dst[42] = sum; ; } ; CHECK-LABEL: @reduc_store -; CHECK-NOT: vector.body +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4, !alias.scope !0 +; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: store i32 [[TMP6]], i32* [[GEP_DST:%.*]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] define void @reduc_store(i32* %dst, i32* readonly %src) { entry: %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42 @@ -41,7 +57,14 @@ exit: ; dst[42] = sum; ; } ; CHECK-LABEL: @reduc_store_fadd_fast -; CHECK-NOT: vector.body +; CHECK: vector.body: +; CHECK: phi <4 x float> +; CHECK: load <4 x float> +; CHECK: fadd fast <4 x float> +; CHECK-NOT: store float %{{[0-9]+}}, float* %gep.dst +; CHECK: middle.block: +; CHECK-NEXT: [[TMP:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32 +; CHECK-NEXT: store float %{{[0-9]+}}, float* %gep.dst define void @reduc_store_fadd_fast(float* %dst, float* readonly %src) { entry: %gep.dst = getelementptr inbounds float, float* %dst, i64 42 @@ -152,7 +175,55 @@ for.end: ; dst[42] = sum; ; } ; CHECK-LABEL: @reduc_store_inside_unrolled -; CHECK-NOT: vector.body +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4, !alias.scope !11 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 4, !alias.scope !11 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP6]], align 4, !alias.scope !11 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP7]], align 4, !alias.scope !11 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP11]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP17:%.*]] = or <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1> +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP17]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP17]], i32 2 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP19]], align 4, !alias.scope !11 +; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP21]], align 4, !alias.scope !11 +; CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP23]], align 4, !alias.scope !11 +; CHECK-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP25]], align 4, !alias.scope !11 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP27]], i32 1 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP28]], i32 2 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 3 +; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP33]], [[TMP16]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8> +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP34]]) +; CHECK-NEXT: store i32 [[TMP36]], i32* [[GEP_DST:%.*]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 500, 500 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] define void @reduc_store_inside_unrolled(i32* %dst, i32* readonly %src) { entry: %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42 @@ -160,7 +231,7 @@ entry: for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum = phi i32 [ 0, %entry ], [ %sum.1, %for.body ] + %sum = phi i32 [ 0, %entry ], [ %sum.2, %for.body ] %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv %0 = load i32, i32* %gep.src, align 4 %sum.1 = add nsw i32 %0, %sum @@ -178,6 +249,38 @@ exit: ret void } +; Check that we cannot vectorize code if stored value is not the final reduction +; value +; +; int sum = 0; +; for(int i=0; i < 1000; i++) { +; sum += src[i]; +; dst[42] = sum + 1; +; } +; CHECK-LABEL: @reduc_store_not_final_value +; CHECK-NOT: vector.body: +define void @reduc_store_not_final_value(i32* %dst, i32* readonly %src) { +entry: + %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42 + store i32 0, i32* %gep.dst, align 4 + br label %for.body + +for.body: + %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv + %0 = load i32, i32* %gep.src, align 4 + %add = add nsw i32 %sum, %0 + %sum_plus_one = add i32 %add, 1 + store i32 %sum_plus_one, i32* %gep.dst, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %exit, label %for.body + +exit: + ret void +} + ; We cannot vectorize if two (or more) invariant stores exist in a loop. ; ; int sum = 0; @@ -197,7 +300,7 @@ entry: for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum = phi i32 [ 0, %entry ], [ %sum.1, %for.body ] + %sum = phi i32 [ 0, %entry ], [ %sum.2, %for.body ] %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv %0 = load i32, i32* %arrayidx, align 4 %sum.1 = add nsw i32 %0, %sum @@ -224,7 +327,12 @@ exit: ; dst[42] = sum; ; } ; CHECK-LABEL: @reduc_store_middle_store_predicated -; CHECK-NOT: vector.body +; CHECK: vector.body: +; CHECK-NOT: store i32 %{{[0-9]+}}, i32* %gep.dst +; CHECK: middle.block: +; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.vector.reduce.add.v4i32 +; CHECK-NEXT: store i32 [[TMP]], i32* %gep.dst +; CHECK: ret void define void @reduc_store_middle_store_predicated(i32* %dst, i32* readonly %src) { entry: %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42 @@ -299,6 +407,36 @@ exit: ; preds = %latch ret void } +; Final reduction value is overwritten inside loop +; +; for(int i=0; i < 1000; i++) { +; sum += src[i]; +; dst[42] = sum; +; dst[42] = 0; +; } +; CHECK-LABEL: @reduc_store_final_store_overwritten +; CHECK-NOT: vector.body: +define void @reduc_store_final_store_overwritten(i32* %dst, i32* readonly %src) { +entry: + %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42 + br label %for.body + +for.body: + %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv + %0 = load i32, i32* %gep.src, align 4 + %add = add nsw i32 %sum, %0 + store i32 %add, i32* %gep.dst, align 4 + store i32 0, i32* %gep.dst, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %exit, label %for.body + +exit: + ret void +} + ; Final value used outside of loop does not prevent vectorization ; ; int sum = 0; @@ -308,7 +446,16 @@ exit: ; preds = %latch ; } ; dst[43] = sum; ; CHECK-LABEL: @reduc_store_inoutside -; CHECK-NOT: vector.body +; CHECK: vector.body: +; CHECK-NOT: store i32 %{{[0-9]+}}, i32* %gep.src +; CHECK: middle.block: +; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.vector.reduce.add.v4i32 +; CHECK-NEXT: store i32 [[TMP]], i32* %gep.dst +; CHECK: exit: +; CHECK: [[PHI:%.*]] = phi i32 [ [[TMP1:%.*]], %for.body ], [ [[TMP2:%.*]], %middle.block ] +; CHECK: [[ADDR:%.*]] = getelementptr inbounds i32, i32* %dst, i64 43 +; CHECK: store i32 [[PHI]], i32* [[ADDR]] +; CHECK: ret void define void @reduc_store_inoutside(i32* %dst, i32* readonly %src) { entry: %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 2582791..81593f9 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -146,6 +146,50 @@ for.end: ; preds = %for.body, %entry ret float %red.next } +define void @print_reduction_with_invariant_store(i64 %n, float* noalias %y, float* noalias %dst) { +; CHECK-LABEL: Checking a loop in 'print_reduction_with_invariant_store' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: <x1> vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> +; CHECK-NEXT: vp<[[IV:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<0>, ir<1> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, vp<[[IV]]> +; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> +; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop) +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF +(nuw) vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %red = phi float [ %red.next, %for.body ], [ 0.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %arrayidx, align 4 + %red.next = fadd fast float %lv, %red + store float %red.next, float* %dst, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + define void @print_replicate_predicated_phi(i64 %n, i64* %x) { ; CHECK-LABEL: Checking a loop in 'print_replicate_predicated_phi' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll index 76e0f39..00ac126 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -90,23 +90,16 @@ return: define float @test_merge_anyof_v4sf(<4 x float> %t) { ; CHECK-LABEL: @test_merge_anyof_v4sf( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i4 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[T_FR]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0 -; CHECK-NEXT: [[OR_COND3:%.*]] = or i1 [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 -; CHECK-NEXT: [[OR_COND4:%.*]] = or i1 [[OR_COND3]], [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 -; CHECK-NEXT: [[OR_COND5:%.*]] = or i1 [[OR_COND4]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 -; CHECK-NEXT: [[OR_COND6:%.*]] = or i1 [[OR_COND5]], [[TMP7]] -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> -; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[SHIFT]], [[T_FR]] -; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP8]], i64 0 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]] +; CHECK-NEXT: [[T_FR7:%.*]] = freeze <4 x float> [[T:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR7]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[T_FR7]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i1> [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP3]], 0 +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR7]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> +; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[SHIFT]], [[T_FR7]] +; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP4]], i64 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[ADD]], float 0.000000e+00 ; CHECK-NEXT: ret float [[RETVAL_0]] ; entry: @@ -419,24 +412,16 @@ return: define float @test_merge_anyof_v4si(<4 x i32> %t) { ; CHECK-LABEL: @test_merge_anyof_v4si( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1> -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i4 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR]], <i32 255, i32 255, i32 255, i32 255> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0 -; CHECK-NEXT: [[OR_COND3:%.*]] = or i1 [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 -; CHECK-NEXT: [[OR_COND4:%.*]] = or i1 [[OR_COND3]], [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 -; CHECK-NEXT: [[OR_COND5:%.*]] = or i1 [[OR_COND4]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 -; CHECK-NEXT: [[OR_COND6:%.*]] = or i1 [[OR_COND5]], [[TMP7]] -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR]] -; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP8]], i64 0 +; CHECK-NEXT: [[T_FR7:%.*]] = freeze <4 x i32> [[T:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i32> [[T_FR7]], <i32 -256, i32 -256, i32 -256, i32 -256> +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[TMP0]], <i32 -255, i32 -255, i32 -255, i32 -255> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i1> [[TMP1]] to i4 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP2]], 0 +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR7]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR7]] +; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[CONV]] +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[CONV]], float 0.000000e+00 ; CHECK-NEXT: ret float [[RETVAL_0]] ; entry: diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index 3dd7578..db4f06b 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -41,7 +41,7 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 4, i32 2, i32 5, i32 6> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 6> ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP2]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll new file mode 100644 index 0000000..90eaa71 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -slp-vectorizer -mtriple=aarch64-none-eabi < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +define void @signed(ptr %x, ptr %y, i32 %n) { +; CHECK-LABEL: @signed( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load float, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 + %l2 = load float, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 + %l4 = load float, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 + %l6 = load float, ptr %arrayidx.3, align 4 + %l1 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l0) + %l3 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l2) + %l5 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l4) + %l7 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l6) + store i32 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1 + store i32 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2 + store i32 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3 + store i32 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @unsigned(ptr %x, ptr %y, i32 %n) { +; CHECK-LABEL: @unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]]) +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load float, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 + %l2 = load float, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 + %l4 = load float, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 + %l6 = load float, ptr %arrayidx.3, align 4 + %l1 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l0) + %l3 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l2) + %l5 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l4) + %l7 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l6) + store i32 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1 + store i32 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2 + store i32 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3 + store i32 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +declare i32 @llvm.fptosi.sat.i32.f32(float) +declare i32 @llvm.fptoui.sat.i32.f32(float) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll index 01d743f..4142e31 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -15,10 +15,10 @@ define void @PR28330(i32 %n) { ; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: -; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80> ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) -; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]] +; DEFAULT-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[P17]] ; DEFAULT-NEXT: br label [[FOR_BODY]] ; ; GATHER-LABEL: @PR28330( @@ -27,10 +27,10 @@ define void @PR28330(i32 %n) { ; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: -; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; GATHER-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80> ; GATHER-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) -; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]] +; GATHER-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[P17]] ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR28330( @@ -39,10 +39,10 @@ define void @PR28330(i32 %n) { ; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; MAX-COST-NEXT: br label [[FOR_BODY:%.*]] ; MAX-COST: for.body: -; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80> ; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) -; MAX-COST-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]] +; MAX-COST-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[P17]] ; MAX-COST-NEXT: br label [[FOR_BODY]] ; entry: @@ -92,10 +92,10 @@ define void @PR32038(i32 %n) { ; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: -; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80> ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) -; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5 +; DEFAULT-NEXT: [[OP_RDX]] = add i32 [[TMP3]], -5 ; DEFAULT-NEXT: br label [[FOR_BODY]] ; ; GATHER-LABEL: @PR32038( @@ -104,10 +104,10 @@ define void @PR32038(i32 %n) { ; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: -; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; GATHER-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80> ; GATHER-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) -; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5 +; GATHER-NEXT: [[OP_RDX]] = add i32 [[TMP3]], -5 ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR32038( @@ -116,10 +116,10 @@ define void @PR32038(i32 %n) { ; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; MAX-COST-NEXT: br label [[FOR_BODY:%.*]] ; MAX-COST: for.body: -; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80> ; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) -; MAX-COST-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5 +; MAX-COST-NEXT: [[OP_RDX]] = add i32 [[TMP3]], -5 ; MAX-COST-NEXT: br label [[FOR_BODY]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll index 24c69fb..12a2cd2 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -204,10 +204,10 @@ define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) { define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @reduction_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 7, i32 2> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 6, i32 3> +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 3, i32 6> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 2, i32 7> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], <i32 15, i32 15, i32 15, i32 15> ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], <i32 65537, i32 65537, i32 65537, i32 65537> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll index 962f8ca..03a29f4 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -204,10 +204,10 @@ define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) { define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @reduction_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 7, i32 2> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 6, i32 3> +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 3, i32 6> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 2, i32 7> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], <i32 15, i32 15, i32 15, i32 15> ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], <i32 65537, i32 65537, i32 65537, i32 65537> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll index 6817269..0d20a0e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll @@ -8,7 +8,7 @@ define void @mainTest(i32* %ptr) #0 { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR:%.*]], null ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP:%.*]], label [[BAIL_OUT:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[OP_RDX3:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -17,10 +17,10 @@ define void @mainTest(i32* %ptr) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP1]], [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP7]], 1 -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = add i32 [[OP_EXTRA]], [[TMP4]] -; CHECK-NEXT: [[OP_EXTRA2:%.*]] = add i32 [[OP_EXTRA1]], [[TMP3]] -; CHECK-NEXT: [[OP_EXTRA3]] = add i32 [[OP_EXTRA2]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], [[TMP3]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = add i32 [[OP_RDX1]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX3]] = add i32 [[OP_RDX2]], 1 ; CHECK-NEXT: br label [[LOOP]] ; CHECK: bail_out: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll index d15494e..5455914 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -7,20 +7,20 @@ define void @test() #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_RDX1:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], <i64 3, i64 2, i64 1, i64 0> +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], <i64 2, i64 3, i64 1, i64 0> ; CHECK-NEXT: [[TMP3]] = extractelement <4 x i64> [[TMP2]], i32 3 ; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1 ; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP4]], 32 ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP2]] ; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], <i64 32, i64 32, i64 32, i64 32> ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP7]], 0 -; CHECK-NEXT: [[OP_EXTRA1]] = add i64 [[OP_EXTRA]], [[TMP3]] +; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP7]], [[TMP3]] +; CHECK-NEXT: [[OP_RDX1]] = add i64 [[OP_RDX]], 0 ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll index 44f1fb1..318d452 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -5,98 +5,98 @@ define void @Test(i32) { ; CHECK-LABEL: @Test( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP0]], i32 5 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP0]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP0]], i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> poison, i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[TMP0]], i32 5 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[TMP0]], i32 6 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP0]], i32 7 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> [[TMP16]], i32 [[TMP0]], i32 8 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x i32> [[TMP17]], i32 [[TMP0]], i32 9 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x i32> [[TMP18]], i32 [[TMP0]], i32 10 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <16 x i32> [[TMP19]], i32 [[TMP0]], i32 11 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x i32> [[TMP20]], i32 [[TMP0]], i32 12 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[TMP0]], i32 13 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[TMP0]], i32 14 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[TMP0]], i32 15 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP10:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685> -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0:%.*]] -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA2:%.*]] = and i32 [[OP_EXTRA1]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA2]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]] -; CHECK-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[OP_EXTRA26]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = and <2 x i32> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP10]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> <i32 0, i32 3> +; CHECK-NEXT: [[TMP25:%.*]] = phi <2 x i32> [ [[TMP36:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP25]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 +; CHECK-NEXT: [[TMP27:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685> +; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP24]]) +; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP8]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP27]]) +; CHECK-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[TMP30]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = and i32 [[OP_RDX1]], [[TMP0]] +; CHECK-NEXT: [[OP_RDX3:%.*]] = and i32 [[OP_RDX2]], [[TMP0]] +; CHECK-NEXT: [[OP_RDX4:%.*]] = and i32 [[OP_RDX3]], [[TMP0]] +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[OP_RDX4]], i32 0 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> poison, i32 [[TMP26]], i32 0 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <2 x i32> [[TMP32]], i32 [[TMP26]], i32 1 +; CHECK-NEXT: [[TMP34:%.*]] = and <2 x i32> [[TMP31]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = add <2 x i32> [[TMP31]], [[TMP33]] +; CHECK-NEXT: [[TMP36]] = shufflevector <2 x i32> [[TMP34]], <2 x i32> [[TMP35]], <2 x i32> <i32 0, i32 3> ; CHECK-NEXT: br label [[LOOP]] ; ; FORCE_REDUCTION-LABEL: @Test( ; FORCE_REDUCTION-NEXT: entry: +; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0:%.*]], i32 0 +; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP0]], i32 1 +; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP0]], i32 2 +; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP0]], i32 3 +; FORCE_REDUCTION-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP0]], i32 4 +; FORCE_REDUCTION-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP0]], i32 5 +; FORCE_REDUCTION-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP0]], i32 6 +; FORCE_REDUCTION-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP0]], i32 7 +; FORCE_REDUCTION-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> poison, i32 [[TMP0]], i32 0 +; FORCE_REDUCTION-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[TMP0]], i32 1 +; FORCE_REDUCTION-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[TMP0]], i32 2 +; FORCE_REDUCTION-NEXT: [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[TMP0]], i32 3 +; FORCE_REDUCTION-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[TMP0]], i32 4 +; FORCE_REDUCTION-NEXT: [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[TMP0]], i32 5 +; FORCE_REDUCTION-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[TMP0]], i32 6 +; FORCE_REDUCTION-NEXT: [[TMP16:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP0]], i32 7 +; FORCE_REDUCTION-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> [[TMP16]], i32 [[TMP0]], i32 8 +; FORCE_REDUCTION-NEXT: [[TMP18:%.*]] = insertelement <16 x i32> [[TMP17]], i32 [[TMP0]], i32 9 +; FORCE_REDUCTION-NEXT: [[TMP19:%.*]] = insertelement <16 x i32> [[TMP18]], i32 [[TMP0]], i32 10 +; FORCE_REDUCTION-NEXT: [[TMP20:%.*]] = insertelement <16 x i32> [[TMP19]], i32 [[TMP0]], i32 11 +; FORCE_REDUCTION-NEXT: [[TMP21:%.*]] = insertelement <16 x i32> [[TMP20]], i32 [[TMP0]], i32 12 +; FORCE_REDUCTION-NEXT: [[TMP22:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[TMP0]], i32 13 +; FORCE_REDUCTION-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[TMP0]], i32 14 +; FORCE_REDUCTION-NEXT: [[TMP24:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[TMP0]], i32 15 ; FORCE_REDUCTION-NEXT: br label [[LOOP:%.*]] ; FORCE_REDUCTION: loop: -; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP12:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] -; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> -; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 -; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240> -; FORCE_REDUCTION-NEXT: [[VAL_20:%.*]] = add i32 [[TMP2]], 1496 -; FORCE_REDUCTION-NEXT: [[VAL_34:%.*]] = add i32 [[TMP2]], 8555 -; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP3]]) -; FORCE_REDUCTION-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]] -; FORCE_REDUCTION-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0:%.*]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA1:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA2:%.*]] = and i32 [[OP_EXTRA1]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA2]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]] -; FORCE_REDUCTION-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP2]] -; FORCE_REDUCTION-NEXT: [[VAL_39:%.*]] = add i32 [[TMP2]], 12529 -; FORCE_REDUCTION-NEXT: [[VAL_40:%.*]] = and i32 [[OP_EXTRA27]], [[VAL_39]] -; FORCE_REDUCTION-NEXT: [[VAL_41:%.*]] = add i32 [[TMP2]], 13685 -; FORCE_REDUCTION-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[VAL_40]], i32 0 -; FORCE_REDUCTION-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1 -; FORCE_REDUCTION-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[VAL_41]], i32 0 -; FORCE_REDUCTION-NEXT: [[TMP10:%.*]] = and <2 x i32> [[TMP8]], [[TMP9]] -; FORCE_REDUCTION-NEXT: [[TMP11:%.*]] = add <2 x i32> [[TMP8]], [[TMP9]] -; FORCE_REDUCTION-NEXT: [[TMP12]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> <i32 0, i32 3> +; FORCE_REDUCTION-NEXT: [[TMP25:%.*]] = phi <2 x i32> [ [[TMP36:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP25]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; FORCE_REDUCTION-NEXT: [[TMP26:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685> +; FORCE_REDUCTION-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP24]]) +; FORCE_REDUCTION-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP8]]) +; FORCE_REDUCTION-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP26]]) +; FORCE_REDUCTION-NEXT: [[OP_RDX13:%.*]] = and i32 [[TMP29]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_RDX14:%.*]] = and i32 [[OP_RDX13]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_RDX15:%.*]] = and i32 [[OP_RDX14]], [[TMP0]] +; FORCE_REDUCTION-NEXT: [[OP_RDX16:%.*]] = and i32 [[OP_RDX15]], [[TMP27]] +; FORCE_REDUCTION-NEXT: [[OP_RDX17:%.*]] = and i32 [[OP_RDX16]], [[TMP28]] +; FORCE_REDUCTION-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[OP_RDX17]], i32 0 +; FORCE_REDUCTION-NEXT: [[TMP31:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 +; FORCE_REDUCTION-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> poison, i32 [[TMP31]], i32 0 +; FORCE_REDUCTION-NEXT: [[TMP33:%.*]] = insertelement <2 x i32> [[TMP32]], i32 [[TMP31]], i32 1 +; FORCE_REDUCTION-NEXT: [[TMP34:%.*]] = and <2 x i32> [[TMP30]], [[TMP33]] +; FORCE_REDUCTION-NEXT: [[TMP35:%.*]] = add <2 x i32> [[TMP30]], [[TMP33]] +; FORCE_REDUCTION-NEXT: [[TMP36]] = shufflevector <2 x i32> [[TMP34]], <2 x i32> [[TMP35]], <2 x i32> <i32 0, i32 3> ; FORCE_REDUCTION-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll index f878bda..ba0585f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll @@ -4,20 +4,20 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) { ; CHECK-LABEL: @mainTest( ; CHECK-NEXT: bci_15.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 31, i32 poison>, i32 [[PARAM:%.*]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 31>, i32 [[PARAM:%.*]], i32 0 ; CHECK-NEXT: br label [[BCI_15:%.*]] ; CHECK: bci_15: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1> -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[SHUFFLE]], <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 -1> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[SHUFFLE]], <i32 -1, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0 ; CHECK-NEXT: store atomic i32 [[TMP4]], i32* [[VALS:%.*]] unordered, align 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP3]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[V44]], i32 0 -; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[OP_RDX]], i32 0 +; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[V44]], i32 1 ; CHECK-NEXT: br i1 true, label [[BCI_15]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll index e911d9c..9a9e6b4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll @@ -60,7 +60,7 @@ define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) { define i8 @i(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @i( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]], <4 x i32> <i32 2, i32 1, i32 7, i32 4> ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP2]]) ; CHECK-NEXT: ret i8 [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll index e3ee84f..16da1ee 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll @@ -60,7 +60,7 @@ define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) { define i8 @i(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @i( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 0, i32 3, i32 5, i32 6> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]], <4 x i32> <i32 2, i32 1, i32 7, i32 4> ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP2]]) ; CHECK-NEXT: ret i8 [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll index 8db775a..3178b34 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll @@ -140,6 +140,32 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { ret <8 x i16> %r07 } +; PR41892 +define void @test_v4f32_v2f32_store(<4 x float> %f, float* %p){ +; CHECK-LABEL: @test_v4f32_v2f32_store( +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1 +; CHECK-NEXT: [[ADD01:%.*]] = fadd float [[X0]], [[X1]] +; CHECK-NEXT: store float [[ADD01]], float* [[P:%.*]], align 4 +; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[F]], i64 2 +; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[F]], i64 3 +; CHECK-NEXT: [[ADD23:%.*]] = fadd float [[X2]], [[X3]] +; CHECK-NEXT: [[P23:%.*]] = getelementptr inbounds float, float* [[P]], i64 1 +; CHECK-NEXT: store float [[ADD23]], float* [[P23]], align 4 +; CHECK-NEXT: ret void +; + %x0 = extractelement <4 x float> %f, i64 0 + %x1 = extractelement <4 x float> %f, i64 1 + %add01 = fadd float %x0, %x1 + store float %add01, float* %p, align 4 + %x2 = extractelement <4 x float> %f, i64 2 + %x3 = extractelement <4 x float> %f, i64 3 + %add23 = fadd float %x2, %x3 + %p23 = getelementptr inbounds float, float* %p, i64 1 + store float %add23, float * %p23, align 4 + ret void +} + ; ; 256-bit vectors ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll index 3f332c1..ce2aa23 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll @@ -140,6 +140,32 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { ret <8 x i16> %r07 } +; PR41892 +define void @test_v4f32_v2f32_store(<4 x float> %f, float* %p){ +; CHECK-LABEL: @test_v4f32_v2f32_store( +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1 +; CHECK-NEXT: [[ADD01:%.*]] = fadd float [[X0]], [[X1]] +; CHECK-NEXT: store float [[ADD01]], float* [[P:%.*]], align 4 +; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[F]], i64 2 +; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[F]], i64 3 +; CHECK-NEXT: [[ADD23:%.*]] = fadd float [[X2]], [[X3]] +; CHECK-NEXT: [[P23:%.*]] = getelementptr inbounds float, float* [[P]], i64 1 +; CHECK-NEXT: store float [[ADD23]], float* [[P23]], align 4 +; CHECK-NEXT: ret void +; + %x0 = extractelement <4 x float> %f, i64 0 + %x1 = extractelement <4 x float> %f, i64 1 + %add01 = fadd float %x0, %x1 + store float %add01, float* %p, align 4 + %x2 = extractelement <4 x float> %f, i64 2 + %x3 = extractelement <4 x float> %f, i64 3 + %add23 = fadd float %x2, %x3 + %p23 = getelementptr inbounds float, float* %p, i64 1 + store float %add23, float * %p23, align 4 + ret void +} + ; ; 256-bit vectors ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index d9ca7d5..048c567 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -13,58 +13,30 @@ define float @baz() { ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16 -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8 -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x float> poison, float [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x float> [[TMP11]], float [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x float> [[TMP12]], float [[TMP5]], i32 2 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x float> [[TMP13]], float [[TMP4]], i32 3 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[TMP10]], i32 4 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x float> [[TMP15]], float [[TMP9]], i32 5 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> [[TMP16]], float [[TMP5]], i32 6 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x float> [[TMP17]], float [[TMP4]], i32 7 -; CHECK-NEXT: [[TMP19:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP18]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP19]], [[CONV]] -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; CHECK-NEXT: store float [[OP_EXTRA1]], float* @res, align 4 -; CHECK-NEXT: ret float [[OP_EXTRA1]] +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> +; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[SHUFFLE]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]] +; CHECK-NEXT: store float [[OP_RDX1]], float* @res, align 4 +; CHECK-NEXT: ret float [[OP_RDX1]] ; ; THRESHOLD-LABEL: @baz( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float -; THRESHOLD-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16 -; THRESHOLD-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16 -; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]] -; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; THRESHOLD-NEXT: [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8 -; THRESHOLD-NEXT: [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8 -; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]] -; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 -; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 -; THRESHOLD-NEXT: [[TMP11:%.*]] = insertelement <8 x float> poison, float [[TMP10]], i32 0 -; THRESHOLD-NEXT: [[TMP12:%.*]] = insertelement <8 x float> [[TMP11]], float [[TMP9]], i32 1 -; THRESHOLD-NEXT: [[TMP13:%.*]] = insertelement <8 x float> [[TMP12]], float [[TMP5]], i32 2 -; THRESHOLD-NEXT: [[TMP14:%.*]] = insertelement <8 x float> [[TMP13]], float [[TMP4]], i32 3 -; THRESHOLD-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[TMP10]], i32 4 -; THRESHOLD-NEXT: [[TMP16:%.*]] = insertelement <8 x float> [[TMP15]], float [[TMP9]], i32 5 -; THRESHOLD-NEXT: [[TMP17:%.*]] = insertelement <8 x float> [[TMP16]], float [[TMP5]], i32 6 -; THRESHOLD-NEXT: [[TMP18:%.*]] = insertelement <8 x float> [[TMP17]], float [[TMP4]], i32 7 -; THRESHOLD-NEXT: [[TMP19:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP18]]) -; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP19]], [[CONV]] -; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; THRESHOLD-NEXT: store float [[OP_EXTRA1]], float* @res, align 4 -; THRESHOLD-NEXT: ret float [[OP_EXTRA1]] +; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 +; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 +; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] +; THRESHOLD-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> +; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[SHUFFLE]]) +; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]] +; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]] +; THRESHOLD-NEXT: store float [[OP_RDX1]], float* @res, align 4 +; THRESHOLD-NEXT: ret float [[OP_RDX1]] ; entry: %0 = load i32, i32* @n, align 4 @@ -107,10 +79,10 @@ define float @bazz() { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] -; CHECK-NEXT: store float [[OP_EXTRA1]], float* @res, align 4 -; CHECK-NEXT: ret float [[OP_EXTRA1]] +; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV6]] +; CHECK-NEXT: store float [[OP_RDX1]], float* @res, align 4 +; CHECK-NEXT: ret float [[OP_RDX1]] ; ; THRESHOLD-LABEL: @bazz( ; THRESHOLD-NEXT: entry: @@ -123,10 +95,10 @@ define float @bazz() { ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]]) -; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] -; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] -; THRESHOLD-NEXT: store float [[OP_EXTRA1]], float* @res, align 4 -; THRESHOLD-NEXT: ret float [[OP_EXTRA1]] +; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]] +; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV6]] +; THRESHOLD-NEXT: store float [[OP_RDX1]], float* @res, align 4 +; THRESHOLD-NEXT: ret float [[OP_RDX1]] ; entry: %0 = load i32, i32* @n, align 4 @@ -343,25 +315,25 @@ entry: define float @f(float* nocapture readonly %x) { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <16 x float>* -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* -; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_32]] to <16 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x float>, <16 x float>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP3]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret float [[OP_RDX]] ; ; THRESHOLD-LABEL: @f( ; THRESHOLD-NEXT: entry: -; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <16 x float>* -; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 -; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* -; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 -; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP3]]) -; THRESHOLD-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]]) +; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>* +; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 +; THRESHOLD-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32 +; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_32]] to <16 x float>* +; THRESHOLD-NEXT: [[TMP3:%.*]] = load <16 x float>, <16 x float>* [[TMP2]], align 4 +; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]]) +; THRESHOLD-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP3]]) ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] ; THRESHOLD-NEXT: ret float [[OP_RDX]] ; @@ -519,8 +491,8 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] -; CHECK-NEXT: ret float [[OP_EXTRA]] +; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]] +; CHECK-NEXT: ret float [[OP_RDX]] ; ; THRESHOLD-LABEL: @f1( ; THRESHOLD-NEXT: entry: @@ -529,8 +501,8 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]]) -; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] -; THRESHOLD-NEXT: ret float [[OP_EXTRA]] +; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]] +; THRESHOLD-NEXT: ret float [[OP_RDX]] ; entry: %rem = srem i32 %a, %b @@ -637,50 +609,50 @@ define float @loadadd31(float* nocapture readonly %x) { ; CHECK-LABEL: @loadadd31( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* -; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 -; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* -; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP7]]) -; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[ARRAYIDX]] to <16 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <8 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x float>, <8 x float>* [[TMP2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_24]] to <4 x float>* +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4 +; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 +; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_28]], align 4 +; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 +; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX_29]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) +; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) ; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX1]], [[TMP1]] -; CHECK-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] -; CHECK-NEXT: ret float [[TMP12]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP6]] +; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP7]] +; CHECK-NEXT: ret float [[OP_RDX3]] ; ; THRESHOLD-LABEL: @loadadd31( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 -; THRESHOLD-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 -; THRESHOLD-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 -; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 -; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* -; THRESHOLD-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 -; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 -; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* -; THRESHOLD-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 -; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 -; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* -; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 -; THRESHOLD-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP7]]) -; THRESHOLD-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP5]]) +; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[ARRAYIDX]] to <16 x float>* +; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 +; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 +; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <8 x float>* +; THRESHOLD-NEXT: [[TMP3:%.*]] = load <8 x float>, <8 x float>* [[TMP2]], align 4 +; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 +; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_24]] to <4 x float>* +; THRESHOLD-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4 +; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 +; THRESHOLD-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_28]], align 4 +; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 +; THRESHOLD-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX_29]], align 4 +; THRESHOLD-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]]) +; THRESHOLD-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]]) ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] -; THRESHOLD-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) +; THRESHOLD-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) ; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] -; THRESHOLD-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX1]], [[TMP1]] -; THRESHOLD-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] -; THRESHOLD-NEXT: ret float [[TMP12]] +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP6]] +; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP7]] +; THRESHOLD-NEXT: ret float [[OP_RDX3]] ; entry: %arrayidx = getelementptr inbounds float, float* %x, i64 1 @@ -780,25 +752,25 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; CHECK-NEXT: ret float [[OP_EXTRA1]] +; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], 3.000000e+00 +; CHECK-NEXT: ret float [[OP_RDX2]] ; ; THRESHOLD-LABEL: @extra_args( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float -; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) -; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] -; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; THRESHOLD-NEXT: ret float [[OP_EXTRA1]] +; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]] +; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]] +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], 3.000000e+00 +; THRESHOLD-NEXT: ret float [[OP_RDX2]] ; entry: %mul = mul nsw i32 %b, %a @@ -836,29 +808,29 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 -; CHECK-NEXT: [[OP_EXTRA2:%.*]] = fadd fast float [[OP_EXTRA1]], 5.000000e+00 -; CHECK-NEXT: [[OP_EXTRA3:%.*]] = fadd fast float [[OP_EXTRA2]], [[CONV]] -; CHECK-NEXT: ret float [[OP_EXTRA3]] +; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], 5.000000e+00 +; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 5.000000e+00 +; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], 3.000000e+00 +; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[CONV]] +; CHECK-NEXT: [[OP_RDX4:%.*]] = fadd fast float [[OP_RDX3]], [[CONV]] +; CHECK-NEXT: ret float [[OP_RDX4]] ; ; THRESHOLD-LABEL: @extra_args_same_several_times( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float -; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) -; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] -; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 -; THRESHOLD-NEXT: [[OP_EXTRA2:%.*]] = fadd fast float [[OP_EXTRA1]], 5.000000e+00 -; THRESHOLD-NEXT: [[OP_EXTRA3:%.*]] = fadd fast float [[OP_EXTRA2]], [[CONV]] -; THRESHOLD-NEXT: ret float [[OP_EXTRA3]] +; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], 5.000000e+00 +; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], 5.000000e+00 +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], 3.000000e+00 +; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[CONV]] +; THRESHOLD-NEXT: [[OP_RDX4:%.*]] = fadd fast float [[OP_RDX3]], [[CONV]] +; THRESHOLD-NEXT: ret float [[OP_RDX4]] ; entry: %mul = mul nsw i32 %b, %a @@ -899,28 +871,28 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; CHECK-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float -; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; CHECK-NEXT: ret float [[OP_EXTRA1]] +; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[CONVC]] +; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], 3.000000e+00 +; CHECK-NEXT: ret float [[OP_RDX3]] ; ; THRESHOLD-LABEL: @extra_args_no_replace( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] ; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float ; THRESHOLD-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float -; THRESHOLD-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00 -; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]] ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) -; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] -; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; THRESHOLD-NEXT: ret float [[OP_EXTRA1]] +; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]] +; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]] +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[CONVC]] +; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], 3.000000e+00 +; THRESHOLD-NEXT: ret float [[OP_RDX3]] ; entry: %mul = mul nsw i32 %b, %a @@ -1019,9 +991,9 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP6]], [[ARG]] -; CHECK-NEXT: [[OP_EXTRA2:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP3]] -; CHECK-NEXT: ret i32 [[OP_EXTRA2]] +; CHECK-NEXT: [[OP_RDX:%.*]] = add nsw i32 [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = add nuw i32 [[OP_RDX]], [[ARG]] +; CHECK-NEXT: ret i32 [[OP_RDX2]] ; ; THRESHOLD-LABEL: @wobble( ; THRESHOLD-NEXT: bb: @@ -1034,9 +1006,9 @@ define i32 @wobble(i32 %arg, i32 %bar) { ; THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer ; THRESHOLD-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> ; THRESHOLD-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) -; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP6]], [[ARG]] -; THRESHOLD-NEXT: [[OP_EXTRA2:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP3]] -; THRESHOLD-NEXT: ret i32 [[OP_EXTRA2]] +; THRESHOLD-NEXT: [[OP_RDX:%.*]] = add nsw i32 [[TMP6]], [[TMP3]] +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = add nuw i32 [[OP_RDX]], [[ARG]] +; THRESHOLD-NEXT: ret i32 [[OP_RDX2]] ; bb: %x1 = xor i32 %arg, %bar diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll index ef45de7..470fa0c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -440,23 +440,11 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { ; ; MINTREESIZE-LABEL: @reschedule_extract( ; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP17]] +; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -490,23 +478,11 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { ; ; MINTREESIZE-LABEL: @take_credit( ; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP17]] +; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -564,39 +540,11 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr ; ; MINTREESIZE-LABEL: @_vadd256( ; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[B]], i32 6 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[B]], i32 5 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[B]], i32 4 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[B]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[A]], i32 6 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = extractelement <8 x float> [[A]], i32 5 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = extractelement <8 x float> [[A]], i32 4 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[A]], i32 3 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = extractelement <8 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = extractelement <8 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP16]], i32 0 -; MINTREESIZE-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP8]], i32 1 -; MINTREESIZE-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0 -; MINTREESIZE-NEXT: [[TMP20:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP7]], i32 1 -; MINTREESIZE-NEXT: [[TMP21:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i32 0 -; MINTREESIZE-NEXT: [[TMP22:%.*]] = insertelement <2 x float> [[TMP21]], float [[TMP6]], i32 1 -; MINTREESIZE-NEXT: [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 -; MINTREESIZE-NEXT: [[TMP24:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP5]], i32 1 -; MINTREESIZE-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0 -; MINTREESIZE-NEXT: [[TMP26:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP27:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i32 0 -; MINTREESIZE-NEXT: [[TMP28:%.*]] = insertelement <2 x float> [[TMP27]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i32 0 -; MINTREESIZE-NEXT: [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP31:%.*]] = insertelement <2 x float> poison, float [[TMP9]], i32 0 -; MINTREESIZE-NEXT: [[TMP32:%.*]] = insertelement <2 x float> [[TMP31]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP33:%.*]] = fadd <8 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <8 x float> [[TMP33]] +; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <8 x float> [[TMP5]] ; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %b, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index e42bc3b..d7adf19 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -475,23 +475,11 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { ; ; MINTREESIZE-LABEL: @reschedule_extract( ; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP17]] +; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -525,23 +513,11 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { ; ; MINTREESIZE-LABEL: @take_credit( ; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i32 0 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = fadd <4 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <4 x float> [[TMP17]] +; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]] ; %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 @@ -599,39 +575,11 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr ; ; MINTREESIZE-LABEL: @_vadd256( ; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[B]], i32 6 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[B]], i32 5 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[B]], i32 4 -; MINTREESIZE-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[B]], i32 3 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[B]], i32 2 -; MINTREESIZE-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[B]], i32 1 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[B]], i32 0 -; MINTREESIZE-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 -; MINTREESIZE-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[A]], i32 6 -; MINTREESIZE-NEXT: [[TMP11:%.*]] = extractelement <8 x float> [[A]], i32 5 -; MINTREESIZE-NEXT: [[TMP12:%.*]] = extractelement <8 x float> [[A]], i32 4 -; MINTREESIZE-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[A]], i32 3 -; MINTREESIZE-NEXT: [[TMP14:%.*]] = extractelement <8 x float> [[A]], i32 2 -; MINTREESIZE-NEXT: [[TMP15:%.*]] = extractelement <8 x float> [[A]], i32 1 -; MINTREESIZE-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[A]], i32 0 -; MINTREESIZE-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP16]], i32 0 -; MINTREESIZE-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP8]], i32 1 -; MINTREESIZE-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0 -; MINTREESIZE-NEXT: [[TMP20:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP7]], i32 1 -; MINTREESIZE-NEXT: [[TMP21:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i32 0 -; MINTREESIZE-NEXT: [[TMP22:%.*]] = insertelement <2 x float> [[TMP21]], float [[TMP6]], i32 1 -; MINTREESIZE-NEXT: [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 -; MINTREESIZE-NEXT: [[TMP24:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP5]], i32 1 -; MINTREESIZE-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0 -; MINTREESIZE-NEXT: [[TMP26:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP4]], i32 1 -; MINTREESIZE-NEXT: [[TMP27:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i32 0 -; MINTREESIZE-NEXT: [[TMP28:%.*]] = insertelement <2 x float> [[TMP27]], float [[TMP3]], i32 1 -; MINTREESIZE-NEXT: [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i32 0 -; MINTREESIZE-NEXT: [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP2]], i32 1 -; MINTREESIZE-NEXT: [[TMP31:%.*]] = insertelement <2 x float> poison, float [[TMP9]], i32 0 -; MINTREESIZE-NEXT: [[TMP32:%.*]] = insertelement <2 x float> [[TMP31]], float [[TMP1]], i32 1 -; MINTREESIZE-NEXT: [[TMP33:%.*]] = fadd <8 x float> [[A]], [[B]] -; MINTREESIZE-NEXT: ret <8 x float> [[TMP33]] +; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]] +; MINTREESIZE-NEXT: ret <8 x float> [[TMP5]] ; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %b, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll index 0cfd620..4aa4ce1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll @@ -12,23 +12,25 @@ define void @test() #0 { ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP18:%.*]], [[BB1]] ], [ undef, [[BB]] ] -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 undef, [[TMP]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], [[TMP]] -; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], [[TMP]] -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP]] -; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], [[TMP]] -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP]] -; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], [[TMP]] -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP]] -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP]] -; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP]] -; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[TMP16]], [[TMP]] -; CHECK-NEXT: [[TMP18]] = mul i32 [[TMP17]], [[TMP]] +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[BB1]] ], [ undef, [[BB]] ] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[TMP]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[TMP]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[TMP]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[TMP]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[TMP]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[TMP]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[TMP]], i32 6 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[TMP]], i32 7 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[TMP]], i32 8 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[TMP]], i32 9 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[TMP]], i32 10 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[TMP]], i32 11 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[TMP]], i32 12 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[TMP]], i32 13 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[TMP]], i32 14 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[TMP]], i32 15 +; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP15]]) +; CHECK-NEXT: [[OP_RDX]] = mul i32 [[TMP16]], undef ; CHECK-NEXT: br label [[BB1]] ; bb: @@ -63,23 +65,19 @@ define void @test_2(i8 addrspace(1)* %arg, i32 %arg1) #0 { ; CHECK: bb2: ; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ undef, [[BB:%.*]] ], [ undef, [[BB2]] ] ; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, [[BB]] ], [ undef, [[BB2]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP]], undef -; CHECK-NEXT: [[TMP5:%.*]] = add i32 undef, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = add i32 undef, [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = add i32 undef, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = add i32 undef, [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = add i32 undef, [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i32 undef, [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = add i32 undef, [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = add i32 undef, [[TMP18]] -; CHECK-NEXT: call void @use(i32 [[TMP19]]) +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[TMP]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[TMP]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP]], i32 6 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP]], i32 7 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], undef +; CHECK-NEXT: call void @use(i32 [[OP_RDX1]]) ; CHECK-NEXT: br label [[BB2]] ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll index 90a259f..472ec76 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -10,29 +10,28 @@ define i32 @bar() local_unnamed_addr { ; CHECK-NEXT: [[SUB102_1:%.*]] = sub nsw i32 undef, undef ; CHECK-NEXT: [[ADD78_2:%.*]] = add nsw i32 undef, undef ; CHECK-NEXT: [[SUB102_3:%.*]] = sub nsw i32 undef, undef -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB102_3]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[SUB102_1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD94_1]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[ADD78_1]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[SUB86_1]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[ADD78_2]], i32 5 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD78_1]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[ADD94_1]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_1]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SUB102_3]], i32 4 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> poison, <16 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 4> -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP12:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 21, i32 22, i32 7, i32 24, i32 25, i32 10, i32 27, i32 28, i32 13, i32 30, i32 31> -; CHECK-NEXT: [[TMP14:%.*]] = lshr <16 x i32> [[TMP13]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> -; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i32> [[TMP14]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537> -; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <16 x i32> [[TMP15]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> -; CHECK-NEXT: [[TMP17:%.*]] = add <16 x i32> [[TMP16]], [[TMP13]] -; CHECK-NEXT: [[TMP18:%.*]] = xor <16 x i32> [[TMP17]], [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP18]]) -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP19]], 16 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB102_1]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[ADD94_1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_1]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_3]], i32 4 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 4> +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31> +; CHECK-NEXT: [[TMP13:%.*]] = lshr <16 x i32> [[TMP12]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> +; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i32> [[TMP13]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537> +; CHECK-NEXT: [[TMP15:%.*]] = mul nuw <16 x i32> [[TMP14]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> +; CHECK-NEXT: [[TMP16:%.*]] = add <16 x i32> [[TMP15]], [[TMP12]] +; CHECK-NEXT: [[TMP17:%.*]] = xor <16 x i32> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP17]]) +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP18]], 16 ; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 undef, [[SHR]] ; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1 ; CHECK-NEXT: ret i32 [[SHR120]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll new file mode 100644 index 0000000..aa9d898 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown -slp-vectorizer -opaque-pointers -mcpu=x86-64 -S | FileCheck %s --check-prefixes=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -slp-vectorizer -opaque-pointers -mcpu=x86-64-v2 -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -slp-vectorizer -opaque-pointers -mcpu=x86-64-v3 -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -slp-vectorizer -opaque-pointers -mcpu=x86-64-v4 -S | FileCheck %s --check-prefixes=AVX + +define { i64, i64 } @compute_min(ptr nocapture noundef nonnull readonly align 2 dereferenceable(16) %x, ptr nocapture noundef nonnull readonly align 2 dereferenceable(16) %y) { +; SSE-LABEL: @compute_min( +; SSE-NEXT: entry: +; SSE-NEXT: [[TMP0:%.*]] = load i16, ptr [[Y:%.*]], align 2 +; SSE-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 2 +; SSE-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP0]], i16 [[TMP1]]) +; SSE-NEXT: [[ARRAYIDX_I_I_1:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 1 +; SSE-NEXT: [[ARRAYIDX_I_I10_1:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 1 +; SSE-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_1]], align 2 +; SSE-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_I_I_1]], align 2 +; SSE-NEXT: [[TMP5:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP3]], i16 [[TMP4]]) +; SSE-NEXT: [[ARRAYIDX_I_I_2:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 2 +; SSE-NEXT: [[ARRAYIDX_I_I10_2:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 2 +; SSE-NEXT: [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_2]], align 2 +; SSE-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX_I_I_2]], align 2 +; SSE-NEXT: [[TMP8:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP6]], i16 [[TMP7]]) +; SSE-NEXT: [[ARRAYIDX_I_I_3:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 3 +; SSE-NEXT: [[ARRAYIDX_I_I10_3:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 3 +; SSE-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_3]], align 2 +; SSE-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX_I_I_3]], align 2 +; SSE-NEXT: [[TMP11:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP9]], i16 [[TMP10]]) +; SSE-NEXT: [[ARRAYIDX_I_I_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 4 +; SSE-NEXT: [[ARRAYIDX_I_I10_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 4 +; SSE-NEXT: [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_4]], align 2 +; SSE-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX_I_I_4]], align 2 +; SSE-NEXT: [[TMP14:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP12]], i16 [[TMP13]]) +; SSE-NEXT: [[ARRAYIDX_I_I_5:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 5 +; SSE-NEXT: [[ARRAYIDX_I_I10_5:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 5 +; SSE-NEXT: [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_5]], align 2 +; SSE-NEXT: [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX_I_I_5]], align 2 +; SSE-NEXT: [[TMP17:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP15]], i16 [[TMP16]]) +; SSE-NEXT: [[ARRAYIDX_I_I_6:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 6 +; SSE-NEXT: [[ARRAYIDX_I_I10_6:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 6 +; SSE-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_6]], align 2 +; SSE-NEXT: [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX_I_I_6]], align 2 +; SSE-NEXT: [[TMP20:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP18]], i16 [[TMP19]]) +; SSE-NEXT: [[ARRAYIDX_I_I_7:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 7 +; SSE-NEXT: [[ARRAYIDX_I_I10_7:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 7 +; SSE-NEXT: [[TMP21:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_7]], align 2 +; SSE-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX_I_I_7]], align 2 +; SSE-NEXT: [[TMP23:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP21]], i16 [[TMP22]]) +; SSE-NEXT: [[RETVAL_SROA_4_0_INSERT_EXT:%.*]] = zext i16 [[TMP11]] to i64 +; SSE-NEXT: [[RETVAL_SROA_4_0_INSERT_SHIFT:%.*]] = shl nuw i64 [[RETVAL_SROA_4_0_INSERT_EXT]], 48 +; SSE-NEXT: [[RETVAL_SROA_3_0_INSERT_EXT:%.*]] = zext i16 [[TMP8]] to i64 +; SSE-NEXT: [[RETVAL_SROA_3_0_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[RETVAL_SROA_3_0_INSERT_EXT]], 32 +; SSE-NEXT: [[RETVAL_SROA_3_0_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_4_0_INSERT_SHIFT]], [[RETVAL_SROA_3_0_INSERT_SHIFT]] +; SSE-NEXT: [[RETVAL_SROA_2_0_INSERT_EXT:%.*]] = zext i16 [[TMP5]] to i64 +; SSE-NEXT: [[RETVAL_SROA_2_0_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[RETVAL_SROA_2_0_INSERT_EXT]], 16 +; SSE-NEXT: [[RETVAL_SROA_2_0_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_3_0_INSERT_INSERT]], [[RETVAL_SROA_2_0_INSERT_SHIFT]] +; SSE-NEXT: [[RETVAL_SROA_0_0_INSERT_EXT:%.*]] = zext i16 [[TMP2]] to i64 +; SSE-NEXT: [[RETVAL_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_2_0_INSERT_INSERT]], [[RETVAL_SROA_0_0_INSERT_EXT]] +; SSE-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { i64, i64 } poison, i64 [[RETVAL_SROA_0_0_INSERT_INSERT]], 0 +; SSE-NEXT: [[RETVAL_SROA_9_8_INSERT_EXT:%.*]] = zext i16 [[TMP23]] to i64 +; SSE-NEXT: [[RETVAL_SROA_9_8_INSERT_SHIFT:%.*]] = shl nuw i64 [[RETVAL_SROA_9_8_INSERT_EXT]], 48 +; SSE-NEXT: [[RETVAL_SROA_8_8_INSERT_EXT:%.*]] = zext i16 [[TMP20]] to i64 +; SSE-NEXT: [[RETVAL_SROA_8_8_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[RETVAL_SROA_8_8_INSERT_EXT]], 32 +; SSE-NEXT: [[RETVAL_SROA_8_8_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_9_8_INSERT_SHIFT]], [[RETVAL_SROA_8_8_INSERT_SHIFT]] +; SSE-NEXT: [[RETVAL_SROA_7_8_INSERT_EXT:%.*]] = zext i16 [[TMP17]] to i64 +; SSE-NEXT: [[RETVAL_SROA_7_8_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[RETVAL_SROA_7_8_INSERT_EXT]], 16 +; SSE-NEXT: [[RETVAL_SROA_7_8_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_8_8_INSERT_INSERT]], [[RETVAL_SROA_7_8_INSERT_SHIFT]] +; SSE-NEXT: [[RETVAL_SROA_5_8_INSERT_EXT:%.*]] = zext i16 [[TMP14]] to i64 +; SSE-NEXT: [[RETVAL_SROA_5_8_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_7_8_INSERT_INSERT]], [[RETVAL_SROA_5_8_INSERT_EXT]] +; SSE-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { i64, i64 } [[DOTFCA_0_INSERT]], i64 [[RETVAL_SROA_5_8_INSERT_INSERT]], 1 +; SSE-NEXT: ret { i64, i64 } [[DOTFCA_1_INSERT]] +; +; AVX-LABEL: @compute_min( +; AVX-NEXT: entry: +; AVX-NEXT: [[TMP0:%.*]] = load i16, ptr [[Y:%.*]], align 2 +; AVX-NEXT: [[TMP1:%.*]] = load i16, ptr [[X:%.*]], align 2 +; AVX-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP0]], i16 [[TMP1]]) +; AVX-NEXT: [[ARRAYIDX_I_I_1:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 1 +; AVX-NEXT: [[ARRAYIDX_I_I10_1:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 1 +; AVX-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_1]], align 2 +; AVX-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_I_I_1]], align 2 +; AVX-NEXT: [[TMP5:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP3]], i16 [[TMP4]]) +; AVX-NEXT: [[ARRAYIDX_I_I_2:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 2 +; AVX-NEXT: [[ARRAYIDX_I_I10_2:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 2 +; AVX-NEXT: [[ARRAYIDX_I_I_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 4 +; AVX-NEXT: [[ARRAYIDX_I_I10_4:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 4 +; AVX-NEXT: [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_4]], align 2 +; AVX-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX_I_I_4]], align 2 +; AVX-NEXT: [[TMP8:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP6]], i16 [[TMP7]]) +; AVX-NEXT: [[ARRAYIDX_I_I_5:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 5 +; AVX-NEXT: [[ARRAYIDX_I_I10_5:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 5 +; AVX-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX_I_I10_5]], align 2 +; AVX-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX_I_I_5]], align 2 +; AVX-NEXT: [[TMP11:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP9]], i16 [[TMP10]]) +; AVX-NEXT: [[ARRAYIDX_I_I_6:%.*]] = getelementptr inbounds [8 x i16], ptr [[X]], i64 0, i64 6 +; AVX-NEXT: [[ARRAYIDX_I_I10_6:%.*]] = getelementptr inbounds [8 x i16], ptr [[Y]], i64 0, i64 6 +; AVX-NEXT: [[TMP12:%.*]] = load <2 x i16>, ptr [[ARRAYIDX_I_I10_2]], align 2 +; AVX-NEXT: [[TMP13:%.*]] = load <2 x i16>, ptr [[ARRAYIDX_I_I_2]], align 2 +; AVX-NEXT: [[TMP14:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP12]], <2 x i16> [[TMP13]]) +; AVX-NEXT: [[TMP15:%.*]] = zext <2 x i16> [[TMP14]] to <2 x i64> +; AVX-NEXT: [[TMP16:%.*]] = shl nuw <2 x i64> [[TMP15]], <i64 32, i64 48> +; AVX-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP16]], i32 0 +; AVX-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP16]], i32 1 +; AVX-NEXT: [[RETVAL_SROA_3_0_INSERT_INSERT:%.*]] = or i64 [[TMP18]], [[TMP17]] +; AVX-NEXT: [[RETVAL_SROA_2_0_INSERT_EXT:%.*]] = zext i16 [[TMP5]] to i64 +; AVX-NEXT: [[RETVAL_SROA_2_0_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[RETVAL_SROA_2_0_INSERT_EXT]], 16 +; AVX-NEXT: [[RETVAL_SROA_2_0_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_3_0_INSERT_INSERT]], [[RETVAL_SROA_2_0_INSERT_SHIFT]] +; AVX-NEXT: [[RETVAL_SROA_0_0_INSERT_EXT:%.*]] = zext i16 [[TMP2]] to i64 +; AVX-NEXT: [[RETVAL_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_2_0_INSERT_INSERT]], [[RETVAL_SROA_0_0_INSERT_EXT]] +; AVX-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { i64, i64 } poison, i64 [[RETVAL_SROA_0_0_INSERT_INSERT]], 0 +; AVX-NEXT: [[TMP19:%.*]] = load <2 x i16>, ptr [[ARRAYIDX_I_I10_6]], align 2 +; AVX-NEXT: [[TMP20:%.*]] = load <2 x i16>, ptr [[ARRAYIDX_I_I_6]], align 2 +; AVX-NEXT: [[TMP21:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP19]], <2 x i16> [[TMP20]]) +; AVX-NEXT: [[TMP22:%.*]] = zext <2 x i16> [[TMP21]] to <2 x i64> +; AVX-NEXT: [[TMP23:%.*]] = shl nuw <2 x i64> [[TMP22]], <i64 32, i64 48> +; AVX-NEXT: [[TMP24:%.*]] = extractelement <2 x i64> [[TMP23]], i32 0 +; AVX-NEXT: [[TMP25:%.*]] = extractelement <2 x i64> [[TMP23]], i32 1 +; AVX-NEXT: [[RETVAL_SROA_8_8_INSERT_INSERT:%.*]] = or i64 [[TMP25]], [[TMP24]] +; AVX-NEXT: [[RETVAL_SROA_7_8_INSERT_EXT:%.*]] = zext i16 [[TMP11]] to i64 +; AVX-NEXT: [[RETVAL_SROA_7_8_INSERT_SHIFT:%.*]] = shl nuw nsw i64 [[RETVAL_SROA_7_8_INSERT_EXT]], 16 +; AVX-NEXT: [[RETVAL_SROA_7_8_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_8_8_INSERT_INSERT]], [[RETVAL_SROA_7_8_INSERT_SHIFT]] +; AVX-NEXT: [[RETVAL_SROA_5_8_INSERT_EXT:%.*]] = zext i16 [[TMP8]] to i64 +; AVX-NEXT: [[RETVAL_SROA_5_8_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_7_8_INSERT_INSERT]], [[RETVAL_SROA_5_8_INSERT_EXT]] +; AVX-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { i64, i64 } [[DOTFCA_0_INSERT]], i64 [[RETVAL_SROA_5_8_INSERT_INSERT]], 1 +; AVX-NEXT: ret { i64, i64 } [[DOTFCA_1_INSERT]] +; +entry: + %0 = load i16, ptr %y, align 2 + %1 = load i16, ptr %x, align 2 + %2 = tail call i16 @llvm.smin.i16(i16 %0, i16 %1) + %arrayidx.i.i.1 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 1 + %arrayidx.i.i10.1 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 1 + %3 = load i16, ptr %arrayidx.i.i10.1, align 2 + %4 = load i16, ptr %arrayidx.i.i.1, align 2 + %5 = tail call i16 @llvm.smin.i16(i16 %3, i16 %4) + %arrayidx.i.i.2 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 2 + %arrayidx.i.i10.2 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 2 + %6 = load i16, ptr %arrayidx.i.i10.2, align 2 + %7 = load i16, ptr %arrayidx.i.i.2, align 2 + %8 = tail call i16 @llvm.smin.i16(i16 %6, i16 %7) + %arrayidx.i.i.3 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 3 + %arrayidx.i.i10.3 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 3 + %9 = load i16, ptr %arrayidx.i.i10.3, align 2 + %10 = load i16, ptr %arrayidx.i.i.3, align 2 + %11 = tail call i16 @llvm.smin.i16(i16 %9, i16 %10) + %arrayidx.i.i.4 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 4 + %arrayidx.i.i10.4 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 4 + %12 = load i16, ptr %arrayidx.i.i10.4, align 2 + %13 = load i16, ptr %arrayidx.i.i.4, align 2 + %14 = tail call i16 @llvm.smin.i16(i16 %12, i16 %13) + %arrayidx.i.i.5 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 5 + %arrayidx.i.i10.5 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 5 + %15 = load i16, ptr %arrayidx.i.i10.5, align 2 + %16 = load i16, ptr %arrayidx.i.i.5, align 2 + %17 = tail call i16 @llvm.smin.i16(i16 %15, i16 %16) + %arrayidx.i.i.6 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 6 + %arrayidx.i.i10.6 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 6 + %18 = load i16, ptr %arrayidx.i.i10.6, align 2 + %19 = load i16, ptr %arrayidx.i.i.6, align 2 + %20 = tail call i16 @llvm.smin.i16(i16 %18, i16 %19) + %arrayidx.i.i.7 = getelementptr inbounds [8 x i16], ptr %x, i64 0, i64 7 + %arrayidx.i.i10.7 = getelementptr inbounds [8 x i16], ptr %y, i64 0, i64 7 + %21 = load i16, ptr %arrayidx.i.i10.7, align 2 + %22 = load i16, ptr %arrayidx.i.i.7, align 2 + %23 = tail call i16 @llvm.smin.i16(i16 %21, i16 %22) + %retval.sroa.4.0.insert.ext = zext i16 %11 to i64 + %retval.sroa.4.0.insert.shift = shl nuw i64 %retval.sroa.4.0.insert.ext, 48 + %retval.sroa.3.0.insert.ext = zext i16 %8 to i64 + %retval.sroa.3.0.insert.shift = shl nuw nsw i64 %retval.sroa.3.0.insert.ext, 32 + %retval.sroa.3.0.insert.insert = or i64 %retval.sroa.4.0.insert.shift, %retval.sroa.3.0.insert.shift + %retval.sroa.2.0.insert.ext = zext i16 %5 to i64 + %retval.sroa.2.0.insert.shift = shl nuw nsw i64 %retval.sroa.2.0.insert.ext, 16 + %retval.sroa.2.0.insert.insert = or i64 %retval.sroa.3.0.insert.insert, %retval.sroa.2.0.insert.shift + %retval.sroa.0.0.insert.ext = zext i16 %2 to i64 + %retval.sroa.0.0.insert.insert = or i64 %retval.sroa.2.0.insert.insert, %retval.sroa.0.0.insert.ext + %.fca.0.insert = insertvalue { i64, i64 } poison, i64 %retval.sroa.0.0.insert.insert, 0 + %retval.sroa.9.8.insert.ext = zext i16 %23 to i64 + %retval.sroa.9.8.insert.shift = shl nuw i64 %retval.sroa.9.8.insert.ext, 48 + %retval.sroa.8.8.insert.ext = zext i16 %20 to i64 + %retval.sroa.8.8.insert.shift = shl nuw nsw i64 %retval.sroa.8.8.insert.ext, 32 + %retval.sroa.8.8.insert.insert = or i64 %retval.sroa.9.8.insert.shift, %retval.sroa.8.8.insert.shift + %retval.sroa.7.8.insert.ext = zext i16 %17 to i64 + %retval.sroa.7.8.insert.shift = shl nuw nsw i64 %retval.sroa.7.8.insert.ext, 16 + %retval.sroa.7.8.insert.insert = or i64 %retval.sroa.8.8.insert.insert, %retval.sroa.7.8.insert.shift + %retval.sroa.5.8.insert.ext = zext i16 %14 to i64 + %retval.sroa.5.8.insert.insert = or i64 %retval.sroa.7.8.insert.insert, %retval.sroa.5.8.insert.ext + %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.5.8.insert.insert, 1 + ret { i64, i64 } %.fca.1.insert +} +declare i16 @llvm.smin.i16(i16, i16) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index 460f222..5769a29 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -230,17 +230,12 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42> ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17> -; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 -; CHECK-NEXT: [[S4:%.*]] = select i1 [[TMP4]], i1 [[TMP5]], i1 false -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 -; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[TMP6]], i1 false -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 -; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[TMP7]], i1 false -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 -; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[TMP8]], i1 false -; CHECK-NEXT: ret i1 [[S7]] +; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP4]], i1 [[TMP6]], i1 false +; CHECK-NEXT: ret i1 [[OP_RDX]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -265,53 +260,17 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) { } define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) { -; SSE-LABEL: @logical_and_icmp_clamp_extra_use_cmp( -; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; SSE-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; SSE-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; SSE-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42 -; SSE-NEXT: call void @use1(i1 [[C2]]) -; SSE-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X0]], i32 0 -; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X1]], i32 1 -; SSE-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[X3]], i32 2 -; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X0]], i32 3 -; SSE-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 17> -; SSE-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 17> -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> -; SSE-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17 -; SSE-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17 -; SSE-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17 -; SSE-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]] -; SSE-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP8]]) -; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP9]], i1 [[C2]], i1 false -; SSE-NEXT: [[S5:%.*]] = select i1 [[OP_EXTRA]], i1 [[D1]], i1 false -; SSE-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false -; SSE-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false -; SSE-NEXT: ret i1 [[S7]] -; -; AVX-LABEL: @logical_and_icmp_clamp_extra_use_cmp( -; AVX-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; AVX-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; AVX-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; AVX-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; AVX-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 42 -; AVX-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 42 -; AVX-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42 -; AVX-NEXT: call void @use1(i1 [[C2]]) -; AVX-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 42 -; AVX-NEXT: [[D0:%.*]] = icmp sgt i32 [[X0]], 17 -; AVX-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17 -; AVX-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17 -; AVX-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17 -; AVX-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false -; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false -; AVX-NEXT: [[S4:%.*]] = select i1 [[S3]], i1 [[D0]], i1 false -; AVX-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false -; AVX-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false -; AVX-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false -; AVX-NEXT: ret i1 [[S7]] +; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_cmp( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: call void @use1(i1 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17> +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP5]], i1 [[TMP7]], i1 false +; CHECK-NEXT: ret i1 [[OP_RDX]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -338,27 +297,20 @@ define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) { define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_select( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 42 -; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 42 -; CHECK-NEXT: [[D0:%.*]] = icmp sgt i32 [[X0]], 17 -; CHECK-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17 -; CHECK-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17 -; CHECK-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17 -; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[TMP5]], i1 false ; CHECK-NEXT: call void @use1(i1 [[S2]]) -; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false -; CHECK-NEXT: [[S4:%.*]] = select i1 [[S3]], i1 [[D0]], i1 false -; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false -; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false -; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false -; CHECK-NEXT: ret i1 [[S7]] +; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP8]], i1 false +; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i1 [[S2]], i1 false +; CHECK-NEXT: ret i1 [[OP_RDX1]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -393,11 +345,11 @@ define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-NEXT: [[Y1:%.*]] = extractelement <8 x i32> [[Y]], i32 1 ; CHECK-NEXT: [[Y2:%.*]] = extractelement <8 x i32> [[Y]], i32 2 ; CHECK-NEXT: [[Y3:%.*]] = extractelement <8 x i32> [[Y]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[X0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[X1]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[X1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[X0]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[X2]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[X3]], i32 3 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 2, i32 3> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[Y0]], i32 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[Y1]], i32 5 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[Y2]], i32 6 @@ -435,46 +387,37 @@ define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) { define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) { ; SSE-LABEL: @logical_and_icmp_clamp_partial( -; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; SSE-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; SSE-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; SSE-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X0]], i32 0 -; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X1]], i32 1 -; SSE-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[X2]], i32 2 -; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X0]], i32 3 -; SSE-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 17> -; SSE-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 17> -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> -; SSE-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17 -; SSE-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17 -; SSE-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17 +; SSE-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 +; SSE-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[X]], i32 1 +; SSE-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[X]], i32 0 +; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 +; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP3]], i32 1 +; SSE-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[TMP5]], <i32 42, i32 42> +; SSE-NEXT: [[C2:%.*]] = icmp slt i32 [[TMP1]], 42 +; SSE-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17> ; SSE-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]] ; SSE-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP8]]) -; SSE-NEXT: [[S5:%.*]] = select i1 [[TMP9]], i1 [[D1]], i1 false -; SSE-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false -; SSE-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false -; SSE-NEXT: ret i1 [[S7]] +; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 +; SSE-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP9]], i1 [[TMP10]], i1 false +; SSE-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1 +; SSE-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i1 [[TMP11]], i1 false +; SSE-NEXT: [[OP_RDX2:%.*]] = select i1 [[OP_RDX1]], i1 [[C2]], i1 false +; SSE-NEXT: ret i1 [[OP_RDX2]] ; ; AVX-LABEL: @logical_and_icmp_clamp_partial( -; AVX-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; AVX-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; AVX-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; AVX-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; AVX-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 42 -; AVX-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 42 -; AVX-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42 -; AVX-NEXT: [[D0:%.*]] = icmp sgt i32 [[X0]], 17 -; AVX-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17 -; AVX-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17 -; AVX-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17 -; AVX-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false -; AVX-NEXT: [[S4:%.*]] = select i1 [[S2]], i1 [[D0]], i1 false -; AVX-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false -; AVX-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false -; AVX-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false -; AVX-NEXT: ret i1 [[S7]] +; AVX-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 +; AVX-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[X]], i32 1 +; AVX-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[X]], i32 0 +; AVX-NEXT: [[C0:%.*]] = icmp slt i32 [[TMP3]], 42 +; AVX-NEXT: [[C1:%.*]] = icmp slt i32 [[TMP2]], 42 +; AVX-NEXT: [[C2:%.*]] = icmp slt i32 [[TMP1]], 42 +; AVX-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17> +; AVX-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] +; AVX-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) +; AVX-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[C1]], i1 false +; AVX-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i1 [[C0]], i1 false +; AVX-NEXT: [[OP_RDX2:%.*]] = select i1 [[OP_RDX1]], i1 [[C2]], i1 false +; AVX-NEXT: ret i1 [[OP_RDX2]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -499,44 +442,17 @@ define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) { } define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) { -; SSE-LABEL: @logical_and_icmp_clamp_pred_diff( -; SSE-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42> -; SSE-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[X]], <i32 42, i32 42, i32 42, i32 42> -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> -; SSE-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17> -; SSE-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP3]] -; SSE-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) -; SSE-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 -; SSE-NEXT: [[S4:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false -; SSE-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 -; SSE-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[TMP8]], i1 false -; SSE-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 -; SSE-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[TMP9]], i1 false -; SSE-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 -; SSE-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[TMP10]], i1 false -; SSE-NEXT: ret i1 [[S7]] -; -; AVX-LABEL: @logical_and_icmp_clamp_pred_diff( -; AVX-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; AVX-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; AVX-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; AVX-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; AVX-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 42 -; AVX-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 42 -; AVX-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42 -; AVX-NEXT: [[C3:%.*]] = icmp ult i32 [[X3]], 42 -; AVX-NEXT: [[D0:%.*]] = icmp sgt i32 [[X0]], 17 -; AVX-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17 -; AVX-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17 -; AVX-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17 -; AVX-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false -; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false -; AVX-NEXT: [[S4:%.*]] = select i1 [[S3]], i1 [[D0]], i1 false -; AVX-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false -; AVX-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false -; AVX-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false -; AVX-NEXT: ret i1 [[S7]] +; CHECK-LABEL: @logical_and_icmp_clamp_pred_diff( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[X]], <i32 42, i32 42, i32 42, i32 42> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17> +; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP7]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP8]], i1 false +; CHECK-NEXT: ret i1 [[OP_RDX]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -566,8 +482,8 @@ define i1 @logical_and_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) { ; CHECK-NEXT: [[S3:%.*]] = select i1 [[C:%.*]], i1 [[C]], i1 false ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i1 [[S3]], i1 false -; CHECK-NEXT: ret i1 [[OP_EXTRA]] +; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[S3]], i1 false +; CHECK-NEXT: ret i1 [[OP_RDX]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -595,8 +511,8 @@ define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) { ; CHECK-NEXT: [[S3:%.*]] = select i1 [[C:%.*]], i1 true, i1 [[C]] ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i1 true, i1 [[S3]] -; CHECK-NEXT: ret i1 [[OP_EXTRA]] +; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 true, i1 [[S3]] +; CHECK-NEXT: ret i1 [[OP_RDX]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll index 6bee995..1fd2f30 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll @@ -8,20 +8,21 @@ define i64 @test() { ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ 0, [[BB2:%.*]] ], [ 0, [[BB1:%.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[BB2]] ], [ 0, [[BB1]] ] -; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], [[TMP4]] -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP4]] -; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP4]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP4]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], [[TMP4]] -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP4]] -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP4]] -; CHECK-NEXT: [[TMP65:%.*]] = sext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ poison, [[BB2:%.*]] ], [ zeroinitializer, [[BB1:%.*]] ] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1> +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP1]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP1]], i32 5 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP1]], i32 6 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP1]], i32 7 +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP65:%.*]] = sext i32 [[OP_RDX]] to i64 ; CHECK-NEXT: ret i64 [[TMP65]] ; bb1: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll index 362967f..0fac440 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll @@ -24,15 +24,15 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42> ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) -; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]] +; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[SUM]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 [[OP_EXTRA]] +; CHECK-NEXT: ret i32 [[OP_RDX]] ; entry: %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1 @@ -97,17 +97,17 @@ define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) -; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]] +; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP5]], [[SUM]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 [[OP_EXTRA]] +; CHECK-NEXT: ret i32 [[OP_RDX]] ; entry: %arrayidx.p.1 = getelementptr inbounds i32, i32* %p, i64 1 @@ -188,18 +188,18 @@ define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q:%.*]] to <8 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[SHUFFLE]], [[TMP3]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[SHUFFLE]] ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) -; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]] +; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP5]], [[SUM]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 [[OP_EXTRA]] +; CHECK-NEXT: ret i32 [[OP_RDX]] ; entry: %arrayidx.p.1 = getelementptr inbounds i32, i32* %p, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll index 28417c0..7e0dc28 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll @@ -11,9 +11,9 @@ ; } ; Vector cost is 5, Scalar cost is 7 -; AVX: Adding cost -2 for reduction that starts with %7 = load i32, i32* %arrayidx.7, align 4 (It is a splitting reduction) +; AVX: Adding cost -2 for reduction that starts with %0 = load i32, i32* %p, align 4 (It is a splitting reduction) ; Vector cost is 6, Scalar cost is 7 -; SSE: Adding cost -1 for reduction that starts with %7 = load i32, i32* %arrayidx.7, align 4 (It is a splitting reduction) +; SSE: Adding cost -1 for reduction that starts with %0 = load i32, i32* %p, align 4 (It is a splitting reduction) define i32 @test_add(i32* nocapture readonly %p) { ; CHECK-LABEL: @test_add( ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index 0e5d3bc..915fe01 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -11,31 +11,26 @@ define void @hoge() { ; CHECK-NEXT: ret void ; CHECK: bb2: ; CHECK-NEXT: [[T:%.*]] = select i1 undef, i16 undef, i16 15 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> <i16 poison, i16 undef>, i16 [[T]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> <i16 undef, i16 poison>, i16 [[T]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> <i32 undef, i32 63>, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> <i32 63, i32 undef>, [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], undef -; CHECK-NEXT: [[SHUFFLE10:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[SHUFFLE10]], <i32 undef, i32 15, i32 31, i32 47> +; CHECK-NEXT: [[SHUFFLE4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 1> +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[SHUFFLE4]], <i32 15, i32 undef, i32 31, i32 47> ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[TMP5]], i32 undef ; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63 ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <2 x i32> undef, [[TMP1]] ; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP6]], undef ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -49, i32 -33, i32 -33, i32 -17> -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP9]], undef -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP9]], i32 undef -; CHECK-NEXT: [[OP_EXTRA2:%.*]] = icmp slt i32 [[OP_EXTRA1]], undef -; CHECK-NEXT: [[OP_EXTRA3:%.*]] = select i1 [[OP_EXTRA2]], i32 [[OP_EXTRA1]], i32 undef -; CHECK-NEXT: [[OP_EXTRA4:%.*]] = icmp slt i32 [[OP_EXTRA3]], undef -; CHECK-NEXT: [[OP_EXTRA5:%.*]] = select i1 [[OP_EXTRA4]], i32 [[OP_EXTRA3]], i32 undef -; CHECK-NEXT: [[OP_EXTRA6:%.*]] = icmp slt i32 [[OP_EXTRA5]], undef -; CHECK-NEXT: [[OP_EXTRA7:%.*]] = select i1 [[OP_EXTRA6]], i32 [[OP_EXTRA5]], i32 undef -; CHECK-NEXT: [[OP_EXTRA8:%.*]] = icmp slt i32 [[OP_EXTRA7]], undef -; CHECK-NEXT: [[OP_EXTRA9:%.*]] = select i1 [[OP_EXTRA8]], i32 [[OP_EXTRA7]], i32 undef -; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_EXTRA9]] +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -17, i32 -33, i32 -33, i32 -49> +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP10]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = icmp slt i32 [[OP_RDX1]], undef +; CHECK-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[OP_RDX1]], i32 undef +; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX3]] ; CHECK-NEXT: unreachable ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll index a9a6aa9..e46169a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll @@ -21,19 +21,19 @@ define void @test() { ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[I]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 8 -; CHECK-NEXT: [[I5:%.*]] = add i32 undef, undef ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) -; CHECK-NEXT: [[OP_EXTRA2:%.*]] = add i32 [[TMP2]], [[I5]] -; CHECK-NEXT: [[I10:%.*]] = add i32 [[OP_EXTRA2]], undef -; CHECK-NEXT: [[I11:%.*]] = add i32 [[OP_EXTRA2]], [[I10]] +; CHECK-NEXT: [[OP_RDX3:%.*]] = add i32 [[TMP2]], undef +; CHECK-NEXT: [[OP_RDX4:%.*]] = add i32 [[OP_RDX3]], undef +; CHECK-NEXT: [[I10:%.*]] = add i32 [[OP_RDX4]], undef +; CHECK-NEXT: [[I11:%.*]] = add i32 [[OP_RDX4]], [[I10]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[I1]] to <4 x i32>* ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[I12:%.*]] = add i32 undef, undef ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP5]], [[I12]] -; CHECK-NEXT: [[OP_EXTRA1:%.*]] = add i32 [[OP_EXTRA]], undef -; CHECK-NEXT: [[I18:%.*]] = add i32 [[OP_EXTRA1]], [[I11]] -; CHECK-NEXT: [[I19:%.*]] = add i32 [[OP_EXTRA1]], [[I18]] +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP5]], undef +; CHECK-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], undef +; CHECK-NEXT: [[OP_RDX2:%.*]] = add i32 [[OP_RDX1]], undef +; CHECK-NEXT: [[I18:%.*]] = add i32 [[OP_RDX2]], [[I11]] +; CHECK-NEXT: [[I19:%.*]] = add i32 [[OP_RDX2]], [[I18]] ; CHECK-NEXT: [[I20:%.*]] = add i32 undef, [[I19]] ; CHECK-NEXT: [[I21:%.*]] = add i32 undef, [[I20]] ; CHECK-NEXT: [[I22:%.*]] = add i32 undef, [[I21]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll index 9502620..654d688 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll @@ -7,8 +7,8 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0> -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A1:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4 @@ -53,9 +53,9 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 1, i32 1, i32 0, i32 2, i32 1> -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A1:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 2, i32 1, i32 3, i32 1, i32 1, i32 0, i32 2, i32 1> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4 @@ -104,9 +104,9 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 3, i32 2, i32 3, i32 0, i32 1, i32 0, i32 2, i32 1> -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A1:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 2, i32 3, i32 3, i32 0, i32 1, i32 0, i32 2, i32 1> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4 diff --git a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll index f48d5e2..b2ae15c 100644 --- a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll @@ -43,7 +43,7 @@ declare i32 @llvm.umin.i32(i32, i32) define void @test2() { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>) +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>) ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> poison, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 77) diff --git a/llvm/test/Transforms/Scalarizer/intrinsics.ll b/llvm/test/Transforms/Scalarizer/intrinsics.ll index 896000d..4e5a212 100644 --- a/llvm/test/Transforms/Scalarizer/intrinsics.ll +++ b/llvm/test/Transforms/Scalarizer/intrinsics.ll @@ -24,6 +24,9 @@ declare <2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32) declare <2 x i32> @llvm.smul.fix.sat.v2i32(<2 x i32>, <2 x i32>, i32) declare <2 x i32> @llvm.umul.fix.sat.v2i32(<2 x i32>, <2 x i32>, i32) +declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float>) +declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float>) + ; CHECK-LABEL: @scalarize_sqrt_v2f32( ; CHECK: %sqrt.i0 = call float @llvm.sqrt.f32(float %x.i0) @@ -134,3 +137,25 @@ define <2 x i32> @scalarize_umul_fix_sat_v2i32(<2 x i32> %x) #0 { %umulfixsat = call <2 x i32> @llvm.umul.fix.sat.v2i32(<2 x i32> %x, <2 x i32> <i32 5, i32 19>, i32 31) ret <2 x i32> %umulfixsat } + +; CHECK-LABEL: @scalarize_fptosi_sat( +; CHECK: %sat.i0 = call i32 @llvm.fptosi.sat.i32.f32(float %x.i0) +; CHECK: %sat.i1 = call i32 @llvm.fptosi.sat.i32.f32(float %x.i1) +; CHECK: %sat.upto0 = insertelement <2 x i32> poison, i32 %sat.i0, i32 0 +; CHECK: %sat = insertelement <2 x i32> %sat.upto0, i32 %sat.i1, i32 1 +; CHECK: ret <2 x i32> %sat +define <2 x i32> @scalarize_fptosi_sat(<2 x float> %x) #0 { + %sat = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %x) + ret <2 x i32> %sat +} + +; CHECK-LABEL: @scalarize_fptoui_sat( +; CHECK: %sat.i0 = call i32 @llvm.fptoui.sat.i32.f32(float %x.i0) +; CHECK: %sat.i1 = call i32 @llvm.fptoui.sat.i32.f32(float %x.i1) +; CHECK: %sat.upto0 = insertelement <2 x i32> poison, i32 %sat.i0, i32 0 +; CHECK: %sat = insertelement <2 x i32> %sat.upto0, i32 %sat.i1, i32 1 +; CHECK: ret <2 x i32> %sat +define <2 x i32> @scalarize_fptoui_sat(<2 x float> %x) #0 { + %sat = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %x) + ret <2 x i32> %sat +} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll index b135e28..831dd5f 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll @@ -6,13 +6,15 @@ declare void @some_func() define i32 @need_freeze_of_individual_or_conditions1(i1 %cond1, i1 %cond2, i1 %cond3, i1 %cond4) { ; CHECK-LABEL: @need_freeze_of_individual_or_conditions1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[COND4:%.*]], [[COND1:%.*]] -; CHECK-NEXT: [[DOTFR:%.*]] = freeze i1 [[TMP0]] -; CHECK-NEXT: br i1 [[DOTFR]], label [[ENTRY_SPLIT:%.*]], label [[EXIT_SPLIT:%.*]] +; CHECK-NEXT: [[COND4_FR:%.*]] = freeze i1 [[COND4:%.*]] +; CHECK-NEXT: [[COND1_FR:%.*]] = freeze i1 [[COND1:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[COND4_FR]], [[COND1_FR]] +; CHECK-NEXT: br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[EXIT_SPLIT:%.*]] ; CHECK: entry.split: -; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[COND2:%.*]], [[COND3:%.*]] -; CHECK-NEXT: [[DOTFR2:%.*]] = freeze i1 [[TMP1]] -; CHECK-NEXT: br i1 [[DOTFR2]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[EXIT_SPLIT1:%.*]] +; CHECK-NEXT: [[COND2_FR:%.*]] = freeze i1 [[COND2:%.*]] +; CHECK-NEXT: [[COND3_FR:%.*]] = freeze i1 [[COND3:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[COND2_FR]], [[COND3_FR]] +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[EXIT_SPLIT1:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: @@ -50,13 +52,14 @@ exit: define i32 @need_freeze_of_individual_or_conditions2(i1 noundef %cond1, i1 %cond2, i1 %cond3, i1 %cond4) { ; CHECK-LABEL: @need_freeze_of_individual_or_conditions2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[COND4:%.*]], [[COND1:%.*]] -; CHECK-NEXT: [[DOTFR:%.*]] = freeze i1 [[TMP0]] -; CHECK-NEXT: br i1 [[DOTFR]], label [[ENTRY_SPLIT:%.*]], label [[EXIT_SPLIT:%.*]] +; CHECK-NEXT: [[COND4_FR:%.*]] = freeze i1 [[COND4:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[COND4_FR]], [[COND1:%.*]] +; CHECK-NEXT: br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[EXIT_SPLIT:%.*]] ; CHECK: entry.split: -; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[COND2:%.*]], [[COND3:%.*]] -; CHECK-NEXT: [[DOTFR2:%.*]] = freeze i1 [[TMP1]] -; CHECK-NEXT: br i1 [[DOTFR2]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[EXIT_SPLIT1:%.*]] +; CHECK-NEXT: [[COND2_FR:%.*]] = freeze i1 [[COND2:%.*]] +; CHECK-NEXT: [[COND3_FR:%.*]] = freeze i1 [[COND3:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[COND2_FR]], [[COND3_FR]] +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[EXIT_SPLIT1:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: @@ -94,13 +97,14 @@ exit: define i32 @need_freeze_of_individual_or_conditions3(i1 %cond1, i1 %cond2, i1 %cond3, i1 noundef %cond4) { ; CHECK-LABEL: @need_freeze_of_individual_or_conditions3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[COND4:%.*]], [[COND1:%.*]] -; CHECK-NEXT: [[DOTFR:%.*]] = freeze i1 [[TMP0]] -; CHECK-NEXT: br i1 [[DOTFR]], label [[ENTRY_SPLIT:%.*]], label [[EXIT_SPLIT:%.*]] +; CHECK-NEXT: [[COND1_FR:%.*]] = freeze i1 [[COND1:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[COND4:%.*]], [[COND1_FR]] +; CHECK-NEXT: br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[EXIT_SPLIT:%.*]] ; CHECK: entry.split: -; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[COND2:%.*]], [[COND3:%.*]] -; CHECK-NEXT: [[DOTFR2:%.*]] = freeze i1 [[TMP1]] -; CHECK-NEXT: br i1 [[DOTFR2]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[EXIT_SPLIT1:%.*]] +; CHECK-NEXT: [[COND2_FR:%.*]] = freeze i1 [[COND2:%.*]] +; CHECK-NEXT: [[COND3_FR:%.*]] = freeze i1 [[COND3:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[COND2_FR]], [[COND3_FR]] +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[EXIT_SPLIT1:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: @@ -138,9 +142,10 @@ exit: define i32 @need_freeze_of_individual_and_conditions1(i1 %cond1, i1 %cond4) { ; CHECK-LABEL: @need_freeze_of_individual_and_conditions1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[COND4:%.*]], [[COND1:%.*]] -; CHECK-NEXT: [[DOTFR:%.*]] = freeze i1 [[TMP0]] -; CHECK-NEXT: br i1 [[DOTFR]], label [[EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK-NEXT: [[COND4_FR:%.*]] = freeze i1 [[COND4:%.*]] +; CHECK-NEXT: [[COND1_FR:%.*]] = freeze i1 [[COND1:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[COND4_FR]], [[COND1_FR]] +; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: @@ -172,9 +177,9 @@ exit: define i32 @need_freeze_of_individual_and_conditions2(i1 noundef %cond1, i1 %cond4) { ; CHECK-LABEL: @need_freeze_of_individual_and_conditions2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[COND4:%.*]], [[COND1:%.*]] -; CHECK-NEXT: [[DOTFR:%.*]] = freeze i1 [[TMP0]] -; CHECK-NEXT: br i1 [[DOTFR]], label [[EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK-NEXT: [[COND4_FR:%.*]] = freeze i1 [[COND4:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[COND4_FR]], [[COND1:%.*]] +; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: @@ -206,9 +211,9 @@ exit: define i32 @need_freeze_of_individual_and_conditions3(i1 %cond1, i1 noundef %cond4) { ; CHECK-LABEL: @need_freeze_of_individual_and_conditions3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[COND4:%.*]], [[COND1:%.*]] -; CHECK-NEXT: [[DOTFR:%.*]] = freeze i1 [[TMP0]] -; CHECK-NEXT: br i1 [[DOTFR]], label [[EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK-NEXT: [[COND1_FR:%.*]] = freeze i1 [[COND1:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[COND4:%.*]], [[COND1_FR]] +; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll index f8fdc1a..3208135 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll @@ -152,11 +152,14 @@ exit: define i32 @test_partial_condition_unswitch_or_select(i32* %var, i1 %cond1, i1 %cond2, i1 %cond3, i1 %cond4, i1 %cond5, i1 %cond6) { ; CHECK-LABEL: @test_partial_condition_unswitch_or_select( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[COND4:%.*]], [[COND2:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = or i1 [[TMP0]], [[COND3:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[COND1:%.*]] -; CHECK-NEXT: [[DOTFR:%.*]] = freeze i1 [[TMP2]] -; CHECK-NEXT: br i1 [[DOTFR]], label [[LOOP_EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK-NEXT: [[COND4_FR:%.*]] = freeze i1 [[COND4:%.*]] +; CHECK-NEXT: [[COND2_FR:%.*]] = freeze i1 [[COND2:%.*]] +; CHECK-NEXT: [[COND3_FR:%.*]] = freeze i1 [[COND3:%.*]] +; CHECK-NEXT: [[COND1_FR:%.*]] = freeze i1 [[COND1:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[COND4_FR]], [[COND2_FR]] +; CHECK-NEXT: [[TMP1:%.*]] = or i1 [[TMP0]], [[COND3_FR]] +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[COND1_FR]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_EXIT_SPLIT:%.*]], label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: [[COND6_FR:%.*]] = freeze i1 [[COND6:%.*]] ; CHECK-NEXT: br i1 [[COND6_FR]], label [[LOOP_EXIT_SPLIT1:%.*]], label [[ENTRY_SPLIT_SPLIT:%.*]] diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll index c696238..4bb5fc2 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll @@ -591,11 +591,14 @@ define i32 @test_partial_condition_unswitch_or(i32* %var, i1 %cond1, i1 %cond2, entry: br label %loop_begin ; CHECK-NEXT: entry: -; CHECK-NEXT: %[[INV_OR1:.*]] = or i1 %cond4, %cond2 -; CHECK-NEXT: %[[INV_OR2:.*]] = or i1 %[[INV_OR1]], %cond3 -; CHECK-NEXT: %[[INV_OR3:.*]] = or i1 %[[INV_OR2]], %cond1 -; CHECK-NEXT: [[FROZEN:%.+]] = freeze i1 %[[INV_OR3]] -; CHECK-NEXT: br i1 [[FROZEN]], label %loop_exit.split, label %entry.split +; CHECK-NEXT: %[[C4_FR:.+]] = freeze i1 %cond4 +; CHECK-NEXT: %[[C2_FR:.+]] = freeze i1 %cond2 +; CHECK-NEXT: %[[C3_FR:.+]] = freeze i1 %cond3 +; CHECK-NEXT: %[[C1_FR:.+]] = freeze i1 %cond1 +; CHECK-NEXT: %[[INV_OR1:.*]] = or i1 %[[C4_FR]], %[[C2_FR]] +; CHECK-NEXT: %[[INV_OR2:.*]] = or i1 %[[INV_OR1]], %[[C3_FR]] +; CHECK-NEXT: %[[INV_OR3:.*]] = or i1 %[[INV_OR2]], %[[C1_FR]] +; CHECK-NEXT: br i1 %[[INV_OR3]], label %loop_exit.split, label %entry.split ; ; CHECK: entry.split: ; CHECK-NEXT: br label %loop_begin diff --git a/llvm/test/tools/llvm-ifs/exclude.test b/llvm/test/tools/llvm-ifs/exclude.test index 29f9ab8..7878948 100644 --- a/llvm/test/tools/llvm-ifs/exclude.test +++ b/llvm/test/tools/llvm-ifs/exclude.test @@ -2,6 +2,9 @@ # RUN: llvm-ifs --input-format=IFS --output-ifs=- --exclude='exclude*' %s | FileCheck %s +# Check that exclude excludes from elf files too. +# RUN: llvm-ifs %s --output-elf - --exclude='exclude*' | llvm-ifs - --output-ifs - | FileCheck %s + # RUN: llvm-ifs --input-format=IFS --output-ifs=- --exclude='exclude*' \ # RUN: --strip-undefined %s | FileCheck %s --check-prefix=BOTH @@ -13,6 +16,7 @@ --- !ifs-v1 SoName: somelib.so IfsVersion: 3.0 +Target: x86_64-unknown-linux-gnu Symbols: - { Name: dont_exclude, Type: Func, Undefined: true } - { Name: exclude_1, Type: Func } diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s index 5c0fc8e..0662794 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s @@ -1269,7 +1269,7 @@ vzeroupper # CHECK-NEXT: 1 2 1.00 vmovd %xmm0, %ecx # CHECK-NEXT: 2 1 1.00 * vmovd %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 vmovddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * vmovddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2 # CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2 # CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse3.s index df495ed..4c5908e 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse3.s @@ -58,7 +58,7 @@ mwait # CHECK-NEXT: 1 6 0.50 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 100 0.25 U monitor # CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * movddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * movddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 movshdup %xmm0, %xmm2 # CHECK-NEXT: 1 6 0.50 * movshdup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 movsldup %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s index 005c2ee..38170f8 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s @@ -1269,7 +1269,7 @@ vzeroupper # CHECK-NEXT: 1 2 1.00 vmovd %xmm0, %ecx # CHECK-NEXT: 2 1 1.00 * vmovd %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 vmovddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * vmovddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2 # CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2 # CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s index 61cf0f1..e6bec19 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s @@ -58,7 +58,7 @@ mwait # CHECK-NEXT: 1 6 0.50 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 100 0.25 U monitor # CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * movddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * movddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 movshdup %xmm0, %xmm2 # CHECK-NEXT: 1 6 0.50 * movshdup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 movsldup %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-reduce/mir/drop-ir-references.mir b/llvm/test/tools/llvm-reduce/mir/drop-ir-references.mir new file mode 100644 index 0000000..88d9566 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/mir/drop-ir-references.mir @@ -0,0 +1,86 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce -simplify-mir -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log +# RUN: FileCheck --check-prefix=RESULT %s < %t + +# CHECK-INTERESTINGNESS: G_LOAD +# CHECK-INTERESTINGNESS: G_LOAD +# CHECK-INTERESTINGNESS: G_LOAD +# CHECK-INTERESTINGNESS: G_LOAD +# CHECK-INTERESTINGNESS: G_STORE +# CHECK-INTERESTINGNESS: G_STORE +# CHECK-INTERESTINGNESS: G_STORE %{{[0-9]+}}(s32), %{{[0-9]+}}(p5) :: (store (s32) into %ir.keep.store, addrspace 5) + +# RESULT: name: func +# RESULT: stack: +# RESULT-NEXT: - { id: 0, size: 32, alignment: 8 } + +# RESULT: body: +# RESULT-NEXT: bb.0: +# RESULT: %{{[0-9]+}}:_(<2 x s16>) = G_LOAD %{{[0-9]+}}(p1) :: (load (<2 x s16>), align 32, addrspace 1) + +# RESULT: bb.1: +# RESULT-NEXT: %{{[0-9]+}}:_(<2 x s32>) = G_LOAD %{{[0-9]+}}(p1) :: (load (<2 x s32>), addrspace 3) +# RESULT-NEXT: %{{[0-9]+}}:_(<2 x s32>) = G_LOAD %{{[0-9]+}}(p1) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) +# RESULT-NEXT: %{{[0-9]+}}:_(<2 x s32>) = G_LOAD %{{[0-9]+}}(p1) :: (load (<2 x s32>) from unknown-address + 12, align 4, basealign 8, addrspace 3) + + +# RESULT: bb.2: +# RESULT-NEXT: G_STORE %{{[0-9]+}}(<2 x s32>), %{{[0-9]+}}(p5) :: (store (<2 x s32>) into %fixed-stack.0, addrspace 5) +# RESULT-NEXT: G_STORE %{{[0-9]+}}(<2 x s32>), %{{[0-9]+}}(p5) :: (store (<2 x s32>) into %stack.0, addrspace 5) + +# RESULT: bb.3: +# RESULT-NEXT: G_STORE %{{[0-9]+}}(s32), %{{[0-9]+}}(p5) :: (store (s32) into %ir.keep.store, addrspace 5) +# RESULT-NEXT: S_ENDPGM + +--- | + define void @func(<2 x i16> addrspace(1)* %argptr0, <2 x i32> addrspace(3)* %argptr1, i32 addrspace(5)* %keep.store) { + entry: + %alloca = alloca i32, addrspace(5) + br label %block.name.0 + + block.name.0: + br label %block.name.1 + + block.name.1: + br label %exit + + exit: + ret void + } + +... +--- +name: func +tracksRegLiveness: true +fixedStack: + - { id: 0, offset: 16, size: 8, alignment: 8 } +stack: + - { id: 0, size: 32, alignment: 8, name: alloca } +body: | + bb.0.entry: + S_WAITCNT 0 + S_NOP 0 + %0:_(p1) = G_IMPLICIT_DEF + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>) from %ir.argptr0, align 32, addrspace 1) + %2:_(<2 x s32>) = G_ZEXT %1 + + bb.1.block.name.0: + %3:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>) from %ir.argptr1, addrspace 3) + %4:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>) from %ir.argptr1 + 8, addrspace 3) + %5:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>) from %ir.argptr1 + 12, addrspace 3) + + bb.2.block.name.0: + %6:_(<2 x s32>) = G_ADD %2, %3 + %7:_(<2 x s32>) = G_ADD %6, %4 + %8:_(<2 x s32>) = G_ADD %7, %5 + %9:_(p5) = G_IMPLICIT_DEF + G_STORE %8, %9 :: (store (<2 x s32>) into %fixed-stack.0, addrspace 5) + G_STORE %8, %9 :: (store (<2 x s32>) into %stack.0.alloca, addrspace 5) + + bb.3.exit: + %10:_(p5) = G_IMPLICIT_DEF + %11:_(s32) = G_IMPLICIT_DEF + G_STORE %11, %10 :: (store (s32) into %ir.keep.store, addrspace 5) + S_ENDPGM 0 +... + diff --git a/llvm/test/tools/llvm-reduce/mir/generic-vreg.mir b/llvm/test/tools/llvm-reduce/mir/generic-vreg.mir index edb2cf9..b1099a2 100644 --- a/llvm/test/tools/llvm-reduce/mir/generic-vreg.mir +++ b/llvm/test/tools/llvm-reduce/mir/generic-vreg.mir @@ -12,13 +12,17 @@ # CHECK-INTERESTINGNESS: G_IMPLICIT_DEF # CHECK-INTERESTINGNESS: G_STORE -# RESULT: %v0:vgpr(s32) = COPY $vgpr0, implicit-def %9(<2 x s16>), implicit-def %10(s64), implicit-def %11(s64), implicit-def %12(<2 x s32>) -# RESULT-NEXT: %unused_load_ptr:sgpr(p1) = G_IMPLICIT_DEF -# RESULT-NEXT: %aoeu:_(s64) = G_BITCAST %12(<2 x s32>) +# RESULT: %{{[0-9]+}}:vgpr(s32) = G_IMPLICIT_DEF +# RESULT-NEXT: %{{[0-9]+}}:vgpr(<2 x s16>) = G_IMPLICIT_DEF +# RESULT-NEXT: %{{[0-9]+}}:sgpr(p1) = G_IMPLICIT_DEF +# RESULT-NEXT: %{{[0-9]+}}:_(s64) = G_IMPLICIT_DEF +# RESULT-NEXT: %{{[0-9]+}}:vreg_64(s64) = IMPLICIT_DEF +# RESULT-NEXT: %{{[0-9]+}}:_(<2 x s32>) = G_IMPLICIT_DEF +# RESULT-NEXT: %aoeu:_(s64) = G_BITCAST %14(<2 x s32>) # RESULT-NEXT: %add:_(s64) = G_ADD %aoeu, %aoeu # RESULT-NEXT: %ptr:_(p1) = G_IMPLICIT_DEF -# RESULT-NEXT: G_STORE %v0(s32), %ptr(p1) :: (store (s32), addrspace 1) -# RESULT-NEXT: S_ENDPGM 0, implicit %add(s64), implicit %9(<2 x s16>), implicit %11(s64) +# RESULT-NEXT: G_STORE %{{[0-9]+}}(s32), %ptr(p1) :: (store (s32), addrspace 1) +# RESULT-NEXT: S_ENDPGM 0, implicit %add(s64), implicit %{{[0-9]+}}(<2 x s16>), implicit %{{[0-9]+}}(s64) --- name: f diff --git a/llvm/test/tools/llvm-reduce/mir/instr-reduce.mir b/llvm/test/tools/llvm-reduce/mir/instr-reduce.mir index f252ff3..e6e03b4 100644 --- a/llvm/test/tools/llvm-reduce/mir/instr-reduce.mir +++ b/llvm/test/tools/llvm-reduce/mir/instr-reduce.mir @@ -8,8 +8,8 @@ # pattern in the output and that combined with that the MIR has to be valid # (pass verify) results in the given sequence. -# CHECK: %0:gpr = COPY $x10 -# CHECK-NEXT: %2:gpr = ADDI %0, 5 +# CHECK: [[IMPDEF:%[0-9]+]]:gpr = IMPLICIT_DEF +# CHECK-NEXT: %{{[0-9]+}}:gpr = ADDI [[IMPDEF]], 5 # CHECK-NEXT: PseudoRET implicit $x10 ... diff --git a/llvm/test/tools/llvm-reduce/mir/preserve-block-info.mir b/llvm/test/tools/llvm-reduce/mir/preserve-block-info.mir index f20ebd5..098adb75 100644 --- a/llvm/test/tools/llvm-reduce/mir/preserve-block-info.mir +++ b/llvm/test/tools/llvm-reduce/mir/preserve-block-info.mir @@ -1,5 +1,5 @@ # REQUIRES: amdgpu-registered-target -# RUN: llvm-reduce -simplify-mir -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log +# RUN: llvm-reduce -simplify-mir --delta-passes=instructions -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log # RUN: FileCheck --match-full-lines --check-prefix=RESULT %s < %t # CHECK-INTERESTINGNESS: V_MOV_B32 diff --git a/llvm/test/tools/llvm-reduce/mir/preserve-frame-info.mir b/llvm/test/tools/llvm-reduce/mir/preserve-frame-info.mir index 8046e29..1a89194 100644 --- a/llvm/test/tools/llvm-reduce/mir/preserve-frame-info.mir +++ b/llvm/test/tools/llvm-reduce/mir/preserve-frame-info.mir @@ -1,5 +1,5 @@ # REQUIRES: amdgpu-registered-target -# RUN: llvm-reduce -simplify-mir -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log +# RUN: llvm-reduce -simplify-mir -mtriple=amdgcn-amd-amdhsa --delta-passes=instructions --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log # RUN: FileCheck --match-full-lines --check-prefix=RESULT %s < %t # CHECK-INTERESTINGNESS-COUNT-15: V_MOV_B32 @@ -46,7 +46,7 @@ # RESULT-NEXT: - { id: 9, name: guard, offset: 128, size: 4, alignment: 4 } -# RESULT: S_NOP 0 +# RESULT: bb.0: # RESULT-NEXT: [[FI0:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec # RESULT-NEXT: [[FI1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0.bigalloca, implicit $exec # RESULT-NEXT: [[FI2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.2, implicit $exec diff --git a/llvm/test/tools/llvm-reduce/mir/remove-frame-destroy.mir b/llvm/test/tools/llvm-reduce/mir/remove-frame-destroy.mir new file mode 100644 index 0000000..7a7edcb --- /dev/null +++ b/llvm/test/tools/llvm-reduce/mir/remove-frame-destroy.mir @@ -0,0 +1,24 @@ +# REQUIRES: amdgpu-registered-target +# RUN: llvm-reduce --delta-passes=instructions -mtriple=amdgcn-amd-amdhsa --test FileCheck --test-arg --check-prefix=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t 2> %t.log +# RUN: FileCheck --check-prefix=RESULT %s < %t + +# CHECK-INTERESTINGNESS: S_NOP 0 + +# RESULT: ADJCALLSTACKUP +# RESULT-NEXT: ADJCALLSTACKDOWN +# RESULT-NEXT: S_ENDPGM 0 + +--- +name: frame_setup_destroy +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + S_NOP 0 + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + S_NOP 0 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + S_ENDPGM 0 + +... diff --git a/llvm/test/tools/llvm-reduce/mir/subreg-def0.mir b/llvm/test/tools/llvm-reduce/mir/subreg-def0.mir index e37ab45..7a44956 100644 --- a/llvm/test/tools/llvm-reduce/mir/subreg-def0.mir +++ b/llvm/test/tools/llvm-reduce/mir/subreg-def0.mir @@ -4,7 +4,8 @@ # CHECK-INTERESTINGNESS: V_ADD_U32 -# RESULT: S_WAITCNT 0, implicit-def undef %2.sub1, implicit-def %3.sub0 +# RESULT: undef %2.sub1:vreg_64 = IMPLICIT_DEF +# RESULT-NEXT: %3.sub0:vreg_64 = IMPLICIT_DEF # RESULT-NEXT: %1:vgpr_32 = V_ADD_U32_e32 %2.sub0, %2.sub1, implicit $exec # RESULT-NEXT: S_ENDPGM 0, implicit %1 diff --git a/llvm/test/tools/llvm-reduce/mir/subreg-def1.mir b/llvm/test/tools/llvm-reduce/mir/subreg-def1.mir index 8be7f64..067fbf4 100644 --- a/llvm/test/tools/llvm-reduce/mir/subreg-def1.mir +++ b/llvm/test/tools/llvm-reduce/mir/subreg-def1.mir @@ -5,11 +5,10 @@ # CHECK-INTERESTINGNESS: %{{[0-9]+}}.sub0:vreg_64 = V_ADD_U32_e32 %{{[0-9]+}}.sub1, %{{[0-9]+}}.sub0, implicit $exec # CHECK-INTERESTINGNESS: %{{[0-9]+}}.sub0:vreg_64 = V_ADD_U32_e32 4, %{{[0-9]+}}.sub0, implicit $exec -# RESULT: S_WAITCNT 0, implicit-def undef %2.sub1 -# RESULT-NEXT: %{{[0-9]+}}.sub0:vreg_64 = V_ADD_U32_e32 %{{[0-9]+}}.sub1, %{{[0-9]+}}.sub0, implicit $exec -# RESULT-NEXT: %{{[0-9]+}}.sub0:vreg_64 = V_ADD_U32_e32 4, %{{[0-9]+}}.sub0, implicit $exec -# RESULT-NEXT: S_ENDPGM 0, implicit %{{[0-9]+}}, implicit %{{[0-9]+}}.sub0 - +# RESULT: undef %2.sub1:vreg_64 = IMPLICIT_DEF +# RESULT-NEXT: %0.sub0:vreg_64 = V_ADD_U32_e32 %2.sub1, %2.sub0, implicit $exec +# RESULT-NEXT: %1.sub0:vreg_64 = V_ADD_U32_e32 4, %2.sub0, implicit $exec +# RESULT-NEXT: S_ENDPGM 0, implicit %2, implicit %2.sub0 --- name: f tracksRegLiveness: true |