diff options
145 files changed, 1290 insertions, 1271 deletions
diff --git a/clang/test/CodeGen/AArch64/sve-vector-bitwise-ops.c b/clang/test/CodeGen/AArch64/sve-vector-bitwise-ops.c index 5b97ce4..291d72d 100644 --- a/clang/test/CodeGen/AArch64/sve-vector-bitwise-ops.c +++ b/clang/test/CodeGen/AArch64/sve-vector-bitwise-ops.c @@ -260,8 +260,8 @@ svuint64_t xor_u64(svuint64_t a, svuint64_t b) { // CHECK-LABEL: @neg_bool( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NEG:%.*]] = xor <vscale x 16 x i1> [[A:%.*]], shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer) -// CHECK-NEXT: ret <vscale x 16 x i1> [[NEG]] +// CHECK-NEXT: [[NOT:%.*]] = xor <vscale x 16 x i1> [[A:%.*]], splat (i1 true) +// CHECK-NEXT: ret <vscale x 16 x i1> [[NOT]] // svbool_t neg_bool(svbool_t a) { return ~a; @@ -269,8 +269,8 @@ svbool_t neg_bool(svbool_t a) { // CHECK-LABEL: @neg_i8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NEG:%.*]] = xor <vscale x 16 x i8> [[A:%.*]], shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 -1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer) -// CHECK-NEXT: ret <vscale x 16 x i8> [[NEG]] +// CHECK-NEXT: [[NOT:%.*]] = xor <vscale x 16 x i8> [[A:%.*]], splat (i8 -1) +// CHECK-NEXT: ret <vscale x 16 x i8> [[NOT]] // svint8_t neg_i8(svint8_t a) { return ~a; @@ -278,8 +278,8 @@ svint8_t neg_i8(svint8_t a) { // CHECK-LABEL: @neg_i16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NEG:%.*]] = xor <vscale x 8 x i16> [[A:%.*]], shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -1, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer) -// CHECK-NEXT: ret <vscale x 8 x i16> [[NEG]] +// CHECK-NEXT: [[NOT:%.*]] = xor <vscale x 8 x i16> [[A:%.*]], splat (i16 -1) +// CHECK-NEXT: ret <vscale x 8 x i16> [[NOT]] // svint16_t neg_i16(svint16_t a) { return ~a; @@ -287,8 +287,8 @@ svint16_t neg_i16(svint16_t a) { // CHECK-LABEL: @neg_i32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NEG:%.*]] = xor <vscale x 4 x i32> [[A:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -// CHECK-NEXT: ret <vscale x 4 x i32> [[NEG]] +// CHECK-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i32> [[A:%.*]], splat (i32 -1) +// CHECK-NEXT: ret <vscale x 4 x i32> [[NOT]] // svint32_t neg_i32(svint32_t a) { return ~a; @@ -296,8 +296,8 @@ svint32_t neg_i32(svint32_t a) { // CHECK-LABEL: @neg_i64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NEG:%.*]] = xor <vscale x 2 x i64> [[A:%.*]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -// CHECK-NEXT: ret <vscale x 2 x i64> [[NEG]] +// CHECK-NEXT: [[NOT:%.*]] = xor <vscale x 2 x i64> [[A:%.*]], splat (i64 -1) +// CHECK-NEXT: ret <vscale x 2 x i64> [[NOT]] // svint64_t neg_i64(svint64_t a) { return ~a; @@ -305,8 +305,8 @@ svint64_t neg_i64(svint64_t a) { // CHECK-LABEL: @neg_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NEG:%.*]] = xor <vscale x 16 x i8> [[A:%.*]], shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 -1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer) -// CHECK-NEXT: ret <vscale x 16 x i8> [[NEG]] +// CHECK-NEXT: [[NOT:%.*]] = xor <vscale x 16 x i8> [[A:%.*]], splat (i8 -1) +// CHECK-NEXT: ret <vscale x 16 x i8> [[NOT]] // svuint8_t neg_u8(svuint8_t a) { return ~a; @@ -314,8 +314,8 @@ svuint8_t neg_u8(svuint8_t a) { // CHECK-LABEL: @neg_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NEG:%.*]] = xor <vscale x 8 x i16> [[A:%.*]], shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -1, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer) -// CHECK-NEXT: ret <vscale x 8 x i16> [[NEG]] +// CHECK-NEXT: [[NOT:%.*]] = xor <vscale x 8 x i16> [[A:%.*]], splat (i16 -1) +// CHECK-NEXT: ret <vscale x 8 x i16> [[NOT]] // svuint16_t neg_u16(svuint16_t a) { return ~a; @@ -323,8 +323,8 @@ svuint16_t neg_u16(svuint16_t a) { // CHECK-LABEL: @neg_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NEG:%.*]] = xor <vscale x 4 x i32> [[A:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -// CHECK-NEXT: ret <vscale x 4 x i32> [[NEG]] +// CHECK-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i32> [[A:%.*]], splat (i32 -1) +// CHECK-NEXT: ret <vscale x 4 x i32> [[NOT]] // svuint32_t neg_u32(svuint32_t a) { return ~a; @@ -332,8 +332,8 @@ svuint32_t neg_u32(svuint32_t a) { // CHECK-LABEL: @neg_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NEG:%.*]] = xor <vscale x 2 x i64> [[A:%.*]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -// CHECK-NEXT: ret <vscale x 2 x i64> [[NEG]] +// CHECK-NEXT: [[NOT:%.*]] = xor <vscale x 2 x i64> [[A:%.*]], splat (i64 -1) +// CHECK-NEXT: ret <vscale x 2 x i64> [[NOT]] // svuint64_t neg_u64(svuint64_t a) { return ~a; diff --git a/clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp b/clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp index 7e7eff3..d6fa26b 100644 --- a/clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp +++ b/clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp @@ -164,10 +164,10 @@ svint32_t cond_i32_splat(svint32_t a) { // CHECK-LABEL: @_Z14cond_u32_splatu12__SVUint32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CMP:%.*]] = icmp ult <vscale x 4 x i32> [[A:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +// CHECK-NEXT: [[CMP:%.*]] = icmp ult <vscale x 4 x i32> [[A:%.*]], splat (i32 1) // CHECK-NEXT: [[CONV:%.*]] = zext <vscale x 4 x i1> [[CMP]] to <vscale x 4 x i32> // CHECK-NEXT: [[VECTOR_COND:%.*]] = icmp ne <vscale x 4 x i32> [[CONV]], zeroinitializer -// CHECK-NEXT: [[VECTOR_SELECT:%.*]] = select <vscale x 4 x i1> [[VECTOR_COND]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +// CHECK-NEXT: [[VECTOR_SELECT:%.*]] = select <vscale x 4 x i1> [[VECTOR_COND]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> splat (i32 1) // CHECK-NEXT: ret <vscale x 4 x i32> [[VECTOR_SELECT]] // svuint32_t cond_u32_splat(svuint32_t a) { @@ -188,10 +188,10 @@ svint64_t cond_i64_splat(svint64_t a) { // CHECK-LABEL: @_Z14cond_u64_splatu12__SVUint64_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CMP:%.*]] = icmp ult <vscale x 2 x i64> [[A:%.*]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +// CHECK-NEXT: [[CMP:%.*]] = icmp ult <vscale x 2 x i64> [[A:%.*]], splat (i64 1) // CHECK-NEXT: [[CONV:%.*]] = zext <vscale x 2 x i1> [[CMP]] to <vscale x 2 x i64> // CHECK-NEXT: [[VECTOR_COND:%.*]] = icmp ne <vscale x 2 x i64> [[CONV]], zeroinitializer -// CHECK-NEXT: [[VECTOR_SELECT:%.*]] = select <vscale x 2 x i1> [[VECTOR_COND]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +// CHECK-NEXT: [[VECTOR_SELECT:%.*]] = select <vscale x 2 x i1> [[VECTOR_COND]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> splat (i64 1) // CHECK-NEXT: ret <vscale x 2 x i64> [[VECTOR_SELECT]] // svuint64_t cond_u64_splat(svuint64_t a) { diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index f6a5bc1..f818377 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1741,6 +1741,24 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, } if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + // Use the same shorthand for splat vector (i.e. "splat(Ty val)") as is + // permitted on IR input to reduce the output changes when enabling + // UseConstant{Int,FP}ForScalableSplat. + // TODO: Remove this block when the UseConstant{Int,FP}ForScalableSplat + // options are removed. + if (CE->getOpcode() == Instruction::ShuffleVector) { + if (auto *SplatVal = CE->getSplatValue()) { + if (isa<ConstantInt>(SplatVal) || isa<ConstantFP>(SplatVal)) { + Out << "splat ("; + WriterCtx.TypePrinter->print(SplatVal->getType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, SplatVal, WriterCtx); + Out << ')'; + return; + } + } + } + Out << CE->getOpcodeName(); WriteOptimizationInfo(Out, CE); Out << " ("; diff --git a/llvm/test/Assembler/constant-splat.ll b/llvm/test/Assembler/constant-splat.ll index 1c28310..82e25ad 100644 --- a/llvm/test/Assembler/constant-splat.ll +++ b/llvm/test/Assembler/constant-splat.ll @@ -51,13 +51,13 @@ define <4 x i32> @ret_fixed_lenth_vector_splat_i32() { } define void @add_fixed_lenth_vector_splat_double(<vscale x 2 x double> %a) { -; CHECK: %add = fadd <vscale x 2 x double> %a, shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 5.700000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK: %add = fadd <vscale x 2 x double> %a, splat (double 5.700000e+00) %add = fadd <vscale x 2 x double> %a, splat (double 5.700000e+00) ret void } define <vscale x 4 x i32> @ret_scalable_vector_splat_i32() { -; CHECK: ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 78, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK: ret <vscale x 4 x i32> splat (i32 78) ret <vscale x 4 x i32> splat (i32 78) } diff --git a/llvm/test/Bitcode/vscale-shuffle.ll b/llvm/test/Bitcode/vscale-shuffle.ll index 3f36209..f927949 100644 --- a/llvm/test/Bitcode/vscale-shuffle.ll +++ b/llvm/test/Bitcode/vscale-shuffle.ll @@ -2,8 +2,8 @@ ; RUN: verify-uselistorder < %s define void @f() { - %l = call <vscale x 16 x i8> @l(<vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer)) - %i = add <vscale x 2 x i64> undef, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer) + %l = call <vscale x 16 x i8> @l(<vscale x 16 x i1> splat (i1 true)) + %i = add <vscale x 2 x i64> undef, splat (i64 1) unreachable } diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll index 26fb4ca..71c6380 100644 --- a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll @@ -14,9 +14,9 @@ declare <4 x float> @llvm.cos.v4f32(<4 x float>) declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>) declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>) + ;. ; CHECK: @llvm.compiler.used = appending global [68 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vpowq_f64, ptr @armpl_vpowq_f32, ptr @armpl_svpow_f64_x, ptr @armpl_svpow_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vatan2q_f64, ptr @armpl_vatan2q_f32, ptr @armpl_svatan2_f64_x, ptr @armpl_svatan2_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata" - ;. define <2 x double> @llvm_cos_f64(<2 x double> %in) { ; CHECK-LABEL: define <2 x double> @llvm_cos_f64 @@ -41,7 +41,7 @@ define <4 x float> @llvm_cos_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_cos_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> %in) @@ -51,7 +51,7 @@ define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_cos_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %in) @@ -86,7 +86,7 @@ define <4 x float> @llvm_exp_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> %in) @@ -96,7 +96,7 @@ define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %in) @@ -131,7 +131,7 @@ define <4 x float> @llvm_exp10_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp10_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in) @@ -141,7 +141,7 @@ define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) # define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp10_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in) @@ -176,7 +176,7 @@ define <4 x float> @llvm_exp2_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp2_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in) @@ -186,7 +186,7 @@ define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp2_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in) @@ -221,7 +221,7 @@ define <4 x float> @llvm_log_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_log_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> %in) @@ -231,7 +231,7 @@ define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_log_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %in) @@ -266,7 +266,7 @@ define <4 x float> @llvm_log10_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_log10_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> %in) @@ -276,7 +276,7 @@ define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) # define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_log10_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %in) @@ -311,7 +311,7 @@ define <4 x float> @llvm_log2_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_log2_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> %in) @@ -321,7 +321,7 @@ define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_log2_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_log2_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %in) @@ -356,7 +356,7 @@ define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) { define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %power) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_pow_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POWER:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[POWER]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[POWER]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %power) @@ -366,7 +366,7 @@ define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vs define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %power) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_pow_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POWER:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[POWER]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[POWER]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %power) @@ -401,7 +401,7 @@ define <4 x float> @llvm_sin_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_sin_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %in) @@ -411,7 +411,7 @@ define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_sin_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_sin_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %in) @@ -446,7 +446,7 @@ define <4 x float> @llvm_tan_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_tan_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_tan_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svtan_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svtan_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.tan.nxv2f64(<vscale x 2 x double> %in) @@ -456,7 +456,7 @@ define <vscale x 2 x double> @llvm_tan_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_tan_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_tan_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svtan_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svtan_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.tan.nxv4f32(<vscale x 4 x float> %in) @@ -491,7 +491,7 @@ define <4 x float> @llvm_acos_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_acos_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_acos_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svacos_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svacos_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.acos.nxv2f64(<vscale x 2 x double> %in) @@ -501,7 +501,7 @@ define <vscale x 2 x double> @llvm_acos_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_acos_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_acos_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svacos_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svacos_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.acos.nxv4f32(<vscale x 4 x float> %in) @@ -536,7 +536,7 @@ define <4 x float> @llvm_asin_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_asin_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_asin_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svasin_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svasin_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.asin.nxv2f64(<vscale x 2 x double> %in) @@ -546,7 +546,7 @@ define <vscale x 2 x double> @llvm_asin_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_asin_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_asin_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svasin_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svasin_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.asin.nxv4f32(<vscale x 4 x float> %in) @@ -581,7 +581,7 @@ define <4 x float> @llvm_atan_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_atan_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_atan_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svatan_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svatan_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.atan.nxv2f64(<vscale x 2 x double> %in) @@ -591,7 +591,7 @@ define <vscale x 2 x double> @llvm_atan_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_atan_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_atan_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svatan_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svatan_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.atan.nxv4f32(<vscale x 4 x float> %in) @@ -626,7 +626,7 @@ define <4 x float> @llvm_atan2_f32(<4 x float> %in1, <4 x float> %in2) { define <vscale x 2 x double> @llvm_atan2_vscale_f64(<vscale x 2 x double> %in1, <vscale x 2 x double> %in2) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_atan2_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN1:%.*]], <vscale x 2 x double> [[IN2:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svatan2_f64_x(<vscale x 2 x double> [[IN1]], <vscale x 2 x double> [[IN2]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svatan2_f64_x(<vscale x 2 x double> [[IN1]], <vscale x 2 x double> [[IN2]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.atan2.nxv2f64(<vscale x 2 x double> %in1, <vscale x 2 x double> %in2) @@ -636,7 +636,7 @@ define <vscale x 2 x double> @llvm_atan2_vscale_f64(<vscale x 2 x double> %in1, define <vscale x 4 x float> @llvm_atan2_vscale_f32(<vscale x 4 x float> %in1, <vscale x 4 x float> %in2) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_atan2_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN1:%.*]], <vscale x 4 x float> [[IN2:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svatan2_f32_x(<vscale x 4 x float> [[IN1]], <vscale x 4 x float> [[IN2]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svatan2_f32_x(<vscale x 4 x float> [[IN1]], <vscale x 4 x float> [[IN2]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.atan2.nxv4f32(<vscale x 4 x float> %in1, <vscale x 4 x float> %in2) @@ -671,7 +671,7 @@ define <4 x float> @llvm_cosh_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_cosh_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_cosh_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svcosh_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svcosh_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.cosh.nxv2f64(<vscale x 2 x double> %in) @@ -681,7 +681,7 @@ define <vscale x 2 x double> @llvm_cosh_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_cosh_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_cosh_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svcosh_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svcosh_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.cosh.nxv4f32(<vscale x 4 x float> %in) @@ -716,7 +716,7 @@ define <4 x float> @llvm_sinh_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_sinh_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_sinh_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svsinh_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svsinh_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.sinh.nxv2f64(<vscale x 2 x double> %in) @@ -726,7 +726,7 @@ define <vscale x 2 x double> @llvm_sinh_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_sinh_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_sinh_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svsinh_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svsinh_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.sinh.nxv4f32(<vscale x 4 x float> %in) @@ -761,7 +761,7 @@ define <4 x float> @llvm_tanh_f32(<4 x float> %in) { define <vscale x 2 x double> @llvm_tanh_vscale_f64(<vscale x 2 x double> %in) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @llvm_tanh_vscale_f64 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svtanh_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svtanh_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.tanh.nxv2f64(<vscale x 2 x double> %in) @@ -771,7 +771,7 @@ define <vscale x 2 x double> @llvm_tanh_vscale_f64(<vscale x 2 x double> %in) #0 define <vscale x 4 x float> @llvm_tanh_vscale_f32(<vscale x 4 x float> %in) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @llvm_tanh_vscale_f32 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svtanh_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svtanh_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.tanh.nxv4f32(<vscale x 4 x float> %in) diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll index 07edb46..1d429ec 100644 --- a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll @@ -44,7 +44,7 @@ define <vscale x 4 x float> @llvm_copysign_vscale_f32(<vscale x 4 x float> %mag, define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_cos_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> %in) @@ -53,7 +53,7 @@ define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_cos_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %in) @@ -62,7 +62,7 @@ define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_exp_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> %in) @@ -71,7 +71,7 @@ define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_exp_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %in) @@ -80,7 +80,7 @@ define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_exp10_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in) @@ -89,7 +89,7 @@ define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_exp10_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in) @@ -98,7 +98,7 @@ define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_exp2_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in) @@ -107,7 +107,7 @@ define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_exp2_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in) @@ -170,7 +170,7 @@ define <vscale x 4 x float> @llvm_fma_vscale_f32(<vscale x 4 x float> %a, <vscal define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_log_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> %in) @@ -179,7 +179,7 @@ define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_log_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %in) @@ -188,7 +188,7 @@ define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_log10_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> %in) @@ -197,7 +197,7 @@ define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_log10_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %in) @@ -206,7 +206,7 @@ define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_log2_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> %in) @@ -215,7 +215,7 @@ define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_log2_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_log2_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %in) @@ -278,7 +278,7 @@ define <vscale x 4 x float> @llvm_nearbyint_vscale_f32(<vscale x 4 x float> %in) define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow) { ; CHECK-LABEL: @llvm_pow_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POW:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POW:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow) @@ -287,7 +287,7 @@ define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vs define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow) { ; CHECK-LABEL: @llvm_pow_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POW:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POW:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow) @@ -332,7 +332,7 @@ define <vscale x 4 x float> @llvm_round_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_sin_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %in) @@ -341,7 +341,7 @@ define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_sin_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_sin_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %in) @@ -368,7 +368,7 @@ define <vscale x 4 x float> @llvm_sqrt_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_tan_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_tan_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.tan.nxv2f64(<vscale x 2 x double> %in) @@ -377,7 +377,7 @@ define <vscale x 2 x double> @llvm_tan_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_tan_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_tan_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.tan.nxv4f32(<vscale x 4 x float> %in) @@ -386,7 +386,7 @@ define <vscale x 4 x float> @llvm_tan_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_acos_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_acos_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_acos(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_acos(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.acos.nxv2f64(<vscale x 2 x double> %in) @@ -395,7 +395,7 @@ define <vscale x 2 x double> @llvm_acos_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_acos_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_acos_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_acosf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_acosf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.acos.nxv4f32(<vscale x 4 x float> %in) @@ -404,7 +404,7 @@ define <vscale x 4 x float> @llvm_acos_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_asin_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_asin_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_asin(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_asin(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.asin.nxv2f64(<vscale x 2 x double> %in) @@ -413,7 +413,7 @@ define <vscale x 2 x double> @llvm_asin_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_asin_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_asin_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_asinf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_asinf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.asin.nxv4f32(<vscale x 4 x float> %in) @@ -422,7 +422,7 @@ define <vscale x 4 x float> @llvm_asin_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_atan_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_atan_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_atan(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_atan(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.atan.nxv2f64(<vscale x 2 x double> %in) @@ -431,7 +431,7 @@ define <vscale x 2 x double> @llvm_atan_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_atan_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_atan_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_atanf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_atanf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.atan.nxv4f32(<vscale x 4 x float> %in) @@ -440,7 +440,7 @@ define <vscale x 4 x float> @llvm_atan_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_atan2_vscale_f64(<vscale x 2 x double> %x, <vscale x 2 x double> %y) { ; CHECK-LABEL: @llvm_atan2_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[INX:%.*]], <vscale x 2 x double> [[INY:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[X:%.*]], <vscale x 2 x double> [[Y:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.atan2.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x double> %y) @@ -449,7 +449,7 @@ define <vscale x 2 x double> @llvm_atan2_vscale_f64(<vscale x 2 x double> %x, <v define <vscale x 4 x float> @llvm_atan2_vscale_f32(<vscale x 4 x float> %x, <vscale x 4 x float> %y) { ; CHECK-LABEL: @llvm_atan2_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[INX:%.*]], <vscale x 4 x float> [[INY:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[X:%.*]], <vscale x 4 x float> [[Y:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.atan2.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x float> %y) @@ -458,7 +458,7 @@ define <vscale x 4 x float> @llvm_atan2_vscale_f32(<vscale x 4 x float> %x, <vsc define <vscale x 2 x double> @llvm_cosh_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_cosh_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.cosh.nxv2f64(<vscale x 2 x double> %in) @@ -467,7 +467,7 @@ define <vscale x 2 x double> @llvm_cosh_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_cosh_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_cosh_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_coshf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_coshf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.cosh.nxv4f32(<vscale x 4 x float> %in) @@ -476,7 +476,7 @@ define <vscale x 4 x float> @llvm_cosh_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_sinh_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_sinh_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.sinh.nxv2f64(<vscale x 2 x double> %in) @@ -485,7 +485,7 @@ define <vscale x 2 x double> @llvm_sinh_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_sinh_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_sinh_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.sinh.nxv4f32(<vscale x 4 x float> %in) @@ -494,7 +494,7 @@ define <vscale x 4 x float> @llvm_sinh_vscale_f32(<vscale x 4 x float> %in) { define <vscale x 2 x double> @llvm_tanh_vscale_f64(<vscale x 2 x double> %in) { ; CHECK-LABEL: @llvm_tanh_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call fast <vscale x 2 x double> @llvm.tanh.nxv2f64(<vscale x 2 x double> %in) @@ -503,7 +503,7 @@ define <vscale x 2 x double> @llvm_tanh_vscale_f64(<vscale x 2 x double> %in) { define <vscale x 4 x float> @llvm_tanh_vscale_f32(<vscale x 4 x float> %in) { ; CHECK-LABEL: @llvm_tanh_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call fast <vscale x 4 x float> @llvm.tanh.nxv4f32(<vscale x 4 x float> %in) diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll index 7565e0a..b1ece9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll @@ -17,8 +17,8 @@ define <vscale x 1 x i64> @gather(ptr %a, i32 %len) { ; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 16, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP2]]) -; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) +; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP2]]) ; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]] ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]] @@ -62,8 +62,8 @@ define <vscale x 1 x i64> @gather_disjoint_or(ptr %a, i64 %len) { ; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP1]]) -; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]]) +; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]]) ; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[VSCALE]] ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 2 @@ -116,7 +116,7 @@ define void @scatter(ptr %a, i32 %len) { ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP1]], i64 16, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP2]]) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP1]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]] ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]] @@ -150,8 +150,8 @@ define <vscale x 1 x i64> @gather_loopless(ptr %p, i64 %stride) { ; CHECK-LABEL: @gather_loopless( ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP2]]) -; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) +; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]]) ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] ; %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() @@ -172,8 +172,8 @@ define <vscale x 1 x i64> @straightline_offset_add(ptr %p, i64 %offset) { ; CHECK-LABEL: @straightline_offset_add( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 4, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP2]]) -; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 4, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) +; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]]) ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] ; %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() @@ -194,8 +194,8 @@ define <vscale x 1 x i64> @straightline_offset_disjoint_or(ptr %p, i64 %offset) ; CHECK-LABEL: @straightline_offset_disjoint_or( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP2]]) -; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 8, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) +; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]]) ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] ; %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() @@ -214,8 +214,8 @@ define <vscale x 1 x i64> @straightline_offset_disjoint_or(ptr %p, i64 %offset) define <vscale x 1 x i64> @straightline_offset_shl(ptr %p) { ; CHECK-LABEL: @straightline_offset_shl( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 32, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP1]]) -; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 32, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]]) +; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]]) ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] ; %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() @@ -237,7 +237,7 @@ define <vscale x 1 x i64> @neg_shl_is_not_commutative(ptr %p) { ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[SPLAT_INSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer ; CHECK-NEXT: [[OFFSET:%.*]] = shl <vscale x 1 x i64> [[SPLAT]], [[STEP]] ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 1 x i64> [[OFFSET]] -; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison) +; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> poison) ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] ; %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() @@ -259,8 +259,8 @@ define <vscale x 1 x i64> @straightline_offset_shl_nonc(ptr %p, i64 %shift) { ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 1, [[SHIFT:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP2]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP3]]) -; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP4]], <vscale x 1 x i64> poison, i32 [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP2]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]]) +; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP4]], <vscale x 1 x i64> poison, i32 [[TMP3]]) ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] ; %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() @@ -281,7 +281,7 @@ define void @scatter_loopless(<vscale x 1 x i64> %x, ptr %p, i64 %stride) { ; CHECK-LABEL: @scatter_loopless( ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> [[X:%.*]], ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP2]]) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> [[X:%.*]], ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) ; CHECK-NEXT: ret void ; %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() @@ -302,7 +302,7 @@ define void @scatter_loopless(<vscale x 1 x i64> %x, ptr %p, i64 %stride) { define void @constant_stride(<vscale x 1 x i64> %x, ptr %p, i64 %stride) { ; CHECK-LABEL: @constant_stride( ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 1 x i64> zeroinitializer -; CHECK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x ptr> [[PTRS]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x ptr> [[PTRS]], i32 8, <vscale x 1 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> zeroinitializer @@ -319,8 +319,8 @@ define <vscale x 1 x i64> @vector_base_scalar_offset(ptr %p, i64 %offset) { ; CHECK-LABEL: @vector_base_scalar_offset( ; CHECK-NEXT: [[PTRS2OFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRS2OFFSET]], i64 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP1]]) -; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRS2OFFSET]], i64 8, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]]) +; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]]) ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] ; %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() @@ -339,8 +339,8 @@ define <vscale x 1 x i64> @splat_base_scalar_offset(ptr %p, i64 %offset) { ; CHECK-LABEL: @splat_base_scalar_offset( ; CHECK-NEXT: [[PTRSOFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRSOFFSET]], i64 0, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP1]]) -; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRSOFFSET]], i64 0, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]]) +; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]]) ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] ; %head = insertelement <vscale x 1 x ptr> poison, ptr %p, i32 0 @@ -360,7 +360,7 @@ define <vscale x 1 x i64> @nonstrided_base_scalar_offset(ptr %p, <vscale x 1 x i ; CHECK-LABEL: @nonstrided_base_scalar_offset( ; CHECK-NEXT: [[PTRS1:%.*]] = getelementptr i64, ptr [[P:%.*]], <vscale x 1 x i64> [[V:%.*]] ; CHECK-NEXT: [[PTRS2:%.*]] = getelementptr i64, <vscale x 1 x ptr> [[PTRS1]], i64 [[OFFSET:%.*]] -; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS2]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison) +; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS2]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> poison) ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] ; %ptrs1 = getelementptr i64, ptr %p, <vscale x 1 x i64> %v @@ -380,7 +380,7 @@ define <vscale x 1 x i64> @vector_base_vector_offset(ptr %p, <vscale x 1 x i64> ; CHECK-NEXT: [[STEP:%.*]] = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() ; CHECK-NEXT: [[PTRS1:%.*]] = getelementptr i64, ptr [[P:%.*]], <vscale x 1 x i64> [[STEP]] ; CHECK-NEXT: [[PTRS2:%.*]] = getelementptr i64, <vscale x 1 x ptr> [[PTRS1]], <vscale x 1 x i64> [[OFFSET:%.*]] -; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS2]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison) +; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS2]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> poison) ; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] ; %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll index 597b3bb..a8da5a3 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll @@ -452,7 +452,7 @@ define <vscale x 4 x float> @scalable.expandload.nxv4f32(ptr align 4 %p, <vscale ; CHECK-NEXT: br label [[DOTSPLIT:%.*]] ; CHECK: .split: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP12:%.*]] ] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 4 x i1> splat (i1 true), i64 [[IV]] ; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP12]] ; CHECK: 9: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]] @@ -490,7 +490,7 @@ define void @scalable.compressstore.nxv4f32(ptr align 4 %p, <vscale x 4 x float> ; CHECK-NEXT: br label [[DOTSPLIT:%.*]] ; CHECK: .split: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP12:%.*]] ] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 4 x i1> splat (i1 true), i64 [[IV]] ; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP12]] ; CHECK: 9: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]] diff --git a/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll index 6559596..78582cb 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll @@ -35,7 +35,7 @@ define <vscale x 8 x i16> @or-load-scalable-vector(ptr %p1) { ; CHECK-NEXT: [[L2:%.*]] = load <vscale x 8 x i8>, ptr [[P2]], align 1 ; CHECK-NEXT: [[E1:%.*]] = zext <vscale x 8 x i8> [[L1]] to <vscale x 8 x i16> ; CHECK-NEXT: [[E2:%.*]] = zext <vscale x 8 x i8> [[L2]] to <vscale x 8 x i16> -; CHECK-NEXT: [[S2:%.*]] = shl <vscale x 8 x i16> [[E2]], shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 8, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[S2:%.*]] = shl <vscale x 8 x i16> [[E2]], splat (i16 8) ; CHECK-NEXT: [[OR:%.*]] = or <vscale x 8 x i16> [[E1]], [[S2]] ; CHECK-NEXT: ret <vscale x 8 x i16> [[OR]] ; diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll index 5afe5e9..b97454a 100644 --- a/llvm/test/Transforms/Attributor/nofpclass.ll +++ b/llvm/test/Transforms/Attributor/nofpclass.ll @@ -2667,7 +2667,7 @@ define <vscale x 4 x float> @scalable_splat_pnorm() { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define <vscale x 4 x float> @scalable_splat_pnorm ; CHECK-SAME: () #[[ATTR3]] { -; CHECK-NEXT: ret <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x float> splat (float 1.000000e+00) ; ret <vscale x 4 x float> splat (float 1.0) } diff --git a/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll b/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll index 25cd5526..d592c36 100644 --- a/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll +++ b/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -mtriple=arm64-darwin-unknown -S -passes=consthoist < %s | FileCheck %s --check-prefixes=CHECK,CV -; RUN: opt -mtriple=arm64-darwin-unknown -S -passes=consthoist -use-constant-int-for-fixed-length-splat -use-constant-int-for-scalable-splat < %s | FileCheck %s --check-prefixes=CHECK,CI +; RUN: opt -mtriple=arm64-darwin-unknown -S -passes=consthoist < %s | FileCheck %s +; RUN: opt -mtriple=arm64-darwin-unknown -S -passes=consthoist -use-constant-int-for-fixed-length-splat -use-constant-int-for-scalable-splat < %s | FileCheck %s define i128 @test1(i128 %a) { ; CHECK-LABEL: define i128 @test1( @@ -135,17 +135,11 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %a) { } define <vscale x 2 x i64> @sdiv_nxv2i64(<vscale x 2 x i64> %a) { -; CV-LABEL: define <vscale x 2 x i64> @sdiv_nxv2i64( -; CV-SAME: <vscale x 2 x i64> [[A:%.*]]) { -; CV-NEXT: [[TMP1:%.*]] = sdiv <vscale x 2 x i64> [[A]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 4294967087, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; CV-NEXT: [[TMP2:%.*]] = add <vscale x 2 x i64> [[TMP1]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 4294967087, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; CV-NEXT: ret <vscale x 2 x i64> [[TMP2]] -; -; CI-LABEL: define <vscale x 2 x i64> @sdiv_nxv2i64( -; CI-SAME: <vscale x 2 x i64> [[A:%.*]]) { -; CI-NEXT: [[TMP1:%.*]] = sdiv <vscale x 2 x i64> [[A]], splat (i64 4294967087) -; CI-NEXT: [[TMP2:%.*]] = add <vscale x 2 x i64> [[TMP1]], splat (i64 4294967087) -; CI-NEXT: ret <vscale x 2 x i64> [[TMP2]] +; CHECK-LABEL: define <vscale x 2 x i64> @sdiv_nxv2i64( +; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <vscale x 2 x i64> [[A]], splat (i64 4294967087) +; CHECK-NEXT: [[TMP2:%.*]] = add <vscale x 2 x i64> [[TMP1]], splat (i64 4294967087) +; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] ; %1 = sdiv <vscale x 2 x i64> %a, splat (i64 4294967087) %2 = add <vscale x 2 x i64> %1, splat (i64 4294967087) diff --git a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll index b21e94b..d261868 100644 --- a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll +++ b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll @@ -37,7 +37,7 @@ define <2 x i1> @test.vectorgep.ult.false(<2 x ptr> %vec) { define <vscale x 2 x i1> @test.scalable.vectorgep.ult.true(<vscale x 2 x ptr> %vec) { ; CHECK-LABEL: @test.scalable.vectorgep.ult.true( ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, <vscale x 2 x ptr> [[VEC:%.*]], i64 1 -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %gep.1 = getelementptr inbounds i32, <vscale x 2 x ptr> %vec, i64 1 %t.1 = icmp ult <vscale x 2 x ptr> %vec, %gep.1 diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll index 7060b42..5c6dead 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll @@ -87,7 +87,7 @@ define <vscale x 2 x i16> @infer_nowrap_scalable(<vscale x 2 x i8> %a) { ; CHECK-LABEL: define range(i16 1, 257) <vscale x 2 x i16> @infer_nowrap_scalable( ; CHECK-SAME: <vscale x 2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <vscale x 2 x i8> [[A]] to <vscale x 2 x i16> -; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <vscale x 2 x i16> [[ZEXT]], shufflevector (<vscale x 2 x i16> insertelement (<vscale x 2 x i16> poison, i16 1, i64 0), <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <vscale x 2 x i16> [[ZEXT]], splat (i16 1) ; CHECK-NEXT: ret <vscale x 2 x i16> [[RES]] ; %zext = zext <vscale x 2 x i8> %a to <vscale x 2 x i16> diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll index d6434ad..98f6bf7 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll @@ -6,7 +6,7 @@ target triple = "aarch64-unknown-linux-gnu" define <vscale x 8 x i16> @srshl_abs_undef_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 { ; CHECK-LABEL: @srshl_abs_undef_merge( ; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) @@ -18,7 +18,7 @@ define <vscale x 8 x i16> @srshl_abs_undef_merge(<vscale x 8 x i16> %a, <vscale define <vscale x 8 x i16> @srshl_abs_zero_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 { ; CHECK-LABEL: @srshl_abs_zero_merge( ; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) @@ -29,8 +29,8 @@ define <vscale x 8 x i16> @srshl_abs_zero_merge(<vscale x 8 x i16> %a, <vscale x define <vscale x 8 x i16> @srshl_abs_positive_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 { ; CHECK-LABEL: @srshl_abs_positive_merge( -; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> splat (i16 2), <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %absmerge = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) @@ -44,7 +44,7 @@ define <vscale x 8 x i16> @srshl_abs_all_active_pred(<vscale x 8 x i16> %a, <vsc ; CHECK-LABEL: @srshl_abs_all_active_pred( ; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -57,7 +57,7 @@ define <vscale x 8 x i16> @srshl_abs_all_active_pred(<vscale x 8 x i16> %a, <vsc define <vscale x 8 x i16> @srshl_abs_same_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg) #0 { ; CHECK-LABEL: @srshl_abs_same_pred( ; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) @@ -69,7 +69,7 @@ define <vscale x 8 x i16> @srshl_abs_same_pred(<vscale x 8 x i16> %a, <vscale x define <vscale x 8 x i16> @srshl_sqabs(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 { ; CHECK-LABEL: @srshl_sqabs( ; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) @@ -80,8 +80,8 @@ define <vscale x 8 x i16> @srshl_sqabs(<vscale x 8 x i16> %a, <vscale x 8 x i1> define <vscale x 8 x i16> @srshl_abs_negative_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 { ; CHECK-LABEL: @srshl_abs_negative_merge( -; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -1, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) -; CHECK-NEXT: [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> splat (i16 -1), <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) +; CHECK-NEXT: [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[SHR]] ; %absmerge = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 -1) @@ -94,7 +94,7 @@ define <vscale x 8 x i16> @srshl_abs_negative_merge(<vscale x 8 x i16> %a, <vsca define <vscale x 8 x i16> @srshl_abs_nonconst_merge(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 { ; CHECK-LABEL: @srshl_abs_nonconst_merge( ; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) -; CHECK-NEXT: [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[SHR]] ; %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) @@ -107,7 +107,7 @@ define <vscale x 8 x i16> @srshl_abs_not_all_active_pred(<vscale x 8 x i16> %a, ; CHECK-LABEL: @srshl_abs_not_all_active_pred( ; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 8) ; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]]) -; CHECK-NEXT: [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[SHR]] ; %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 8) @@ -120,7 +120,7 @@ define <vscale x 8 x i16> @srshl_abs_not_all_active_pred(<vscale x 8 x i16> %a, define <vscale x 8 x i16> @srshl_abs_diff_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 { ; CHECK-LABEL: @srshl_abs_diff_pred( ; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) -; CHECK-NEXT: [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[SHR]] ; %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) @@ -132,7 +132,7 @@ define <vscale x 8 x i16> @srshl_abs_diff_pred(<vscale x 8 x i16> %a, <vscale x define <vscale x 8 x i16> @srshl_abs_negative_shift(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 { ; CHECK-LABEL: @srshl_abs_negative_shift( ; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) -; CHECK-NEXT: [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 -2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[SHR]] ; %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll index 51b8816..6f8d8f2 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll @@ -37,7 +37,7 @@ define <vscale x 2 x double> @idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale define <vscale x 2 x double> @idempotent_fmul_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @idempotent_fmul_different_argument_order( ; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> [[A]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG]], <vscale x 2 x double> splat (double 1.000000e+00), <vscale x 2 x double> [[A]]) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0) @@ -61,7 +61,7 @@ define <vscale x 8 x half> @idempotent_fmul_two_dups(<vscale x 8 x i1> %pg, <vsc ; together is sane. ; CHECK-LABEL: define <vscale x 8 x half> @idempotent_fmul_two_dups( ; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: ret <vscale x 8 x half> shufflevector (<vscale x 8 x half> insertelement (<vscale x 8 x half> poison, half 0xH3C00, i64 0), <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 8 x half> splat (half 0xH3C00) ; %1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0) %2 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0) @@ -73,7 +73,7 @@ define <vscale x 8 x half> @idempotent_fmul_two_dups(<vscale x 8 x i1> %pg, <vsc define <vscale x 8 x half> @non_idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @non_idempotent_fmul_f16( ; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> [[PG]], <vscale x 8 x half> [[A]], <vscale x 8 x half> shufflevector (<vscale x 8 x half> insertelement (<vscale x 8 x half> poison, half 0xH4000, i64 0), <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> [[PG]], <vscale x 8 x half> [[A]], <vscale x 8 x half> splat (half 0xH4000)) ; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 2.0) @@ -84,7 +84,7 @@ define <vscale x 8 x half> @non_idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vsca define <vscale x 4 x float> @non_idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @non_idempotent_fmul_f32( ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[A]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[A]], <vscale x 4 x float> splat (float 2.000000e+00)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 2.0) @@ -95,7 +95,7 @@ define <vscale x 4 x float> @non_idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vsc define <vscale x 2 x double> @non_idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @non_idempotent_fmul_f64( ; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[A]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 2.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[A]], <vscale x 2 x double> splat (double 2.000000e+00)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 2.0) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul_u-idempotency.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul_u-idempotency.ll index 5ad0731..8278838 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul_u-idempotency.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul_u-idempotency.ll @@ -37,7 +37,7 @@ define <vscale x 2 x double> @idempotent_fmul_u_f64(<vscale x 2 x i1> %pg, <vsca define <vscale x 2 x double> @idempotent_fmul_u_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @idempotent_fmul_u_different_argument_order( ; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[PG]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> [[A]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[PG]], <vscale x 2 x double> splat (double 1.000000e+00), <vscale x 2 x double> [[A]]) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0) @@ -61,7 +61,7 @@ define <vscale x 8 x half> @idempotent_fmul_u_two_dups(<vscale x 8 x i1> %pg, <v ; together is sane. ; CHECK-LABEL: define <vscale x 8 x half> @idempotent_fmul_u_two_dups( ; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: ret <vscale x 8 x half> shufflevector (<vscale x 8 x half> insertelement (<vscale x 8 x half> poison, half 0xH3C00, i64 0), <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 8 x half> splat (half 0xH3C00) ; %1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0) %2 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0) @@ -73,7 +73,7 @@ define <vscale x 8 x half> @idempotent_fmul_u_two_dups(<vscale x 8 x i1> %pg, <v define <vscale x 8 x half> @non_idempotent_fmul_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 { ; CHECK-LABEL: define <vscale x 8 x half> @non_idempotent_fmul_u_f16( ; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> [[PG]], <vscale x 8 x half> [[A]], <vscale x 8 x half> shufflevector (<vscale x 8 x half> insertelement (<vscale x 8 x half> poison, half 0xH4000, i64 0), <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> [[PG]], <vscale x 8 x half> [[A]], <vscale x 8 x half> splat (half 0xH4000)) ; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] ; %1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 2.0) @@ -84,7 +84,7 @@ define <vscale x 8 x half> @non_idempotent_fmul_u_f16(<vscale x 8 x i1> %pg, <vs define <vscale x 4 x float> @non_idempotent_fmul_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 { ; CHECK-LABEL: define <vscale x 4 x float> @non_idempotent_fmul_u_f32( ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[A]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[A]], <vscale x 4 x float> splat (float 2.000000e+00)) ; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] ; %1 = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 2.0) @@ -95,7 +95,7 @@ define <vscale x 4 x float> @non_idempotent_fmul_u_f32(<vscale x 4 x i1> %pg, <v define <vscale x 2 x double> @non_idempotent_fmul_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 { ; CHECK-LABEL: define <vscale x 2 x double> @non_idempotent_fmul_u_f64( ; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[A]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 2.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[A]], <vscale x 2 x double> splat (double 2.000000e+00)) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 2.0) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-insr.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-insr.ll index e8489c5..979b834 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-insr.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-insr.ll @@ -17,7 +17,7 @@ define <vscale x 16 x i8> @insr_val_into_splatted_val_int(i8 %a) #0 { define <vscale x 8 x i16> @insr_five_into_fives() #0 { ; CHECK-LABEL: @insr_five_into_fives( -; CHECK-NEXT: ret <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 5, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 8 x i16> splat (i16 5) ; %t1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16> splat (i16 5), i16 5) ret <vscale x 8 x i16> %t1 @@ -58,7 +58,7 @@ define <vscale x 16 x i8> @insr_val_into_splatted_other(i8 %a, i8 %b) #0 { define <vscale x 8 x i16> @insr_three_into_fives() #0 { ; CHECK-LABEL: @insr_three_into_fives( -; CHECK-NEXT: [[T1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 5, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), i16 3) +; CHECK-NEXT: [[T1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16> splat (i16 5), i16 3) ; CHECK-NEXT: ret <vscale x 8 x i16> [[T1]] ; %t1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16> splat (i16 5), i16 3) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll index 83018200..08079b7 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll @@ -37,7 +37,7 @@ define <vscale x 2 x i64> @idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale x 2 define <vscale x 2 x i64> @idempotent_mul_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @idempotent_mul_different_argument_order( ; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> [[A]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> [[A]]) ; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 1) @@ -61,7 +61,7 @@ define <vscale x 8 x i16> @idempotent_mul_two_dups(<vscale x 8 x i1> %pg, <vscal ; together is sane. ; CHECK-LABEL: define <vscale x 8 x i16> @idempotent_mul_two_dups( ; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: ret <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 8 x i16> splat (i16 1) ; %1 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 1) %2 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 1) @@ -73,7 +73,7 @@ define <vscale x 8 x i16> @idempotent_mul_two_dups(<vscale x 8 x i1> %pg, <vscal define <vscale x 8 x i16> @non_idempotent_mul_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @non_idempotent_mul_i16( ; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) @@ -84,7 +84,7 @@ define <vscale x 8 x i16> @non_idempotent_mul_i16(<vscale x 8 x i1> %pg, <vscale define <vscale x 4 x i32> @non_idempotent_mul_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @non_idempotent_mul_i32( ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> splat (i32 2)) ; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 2) @@ -95,7 +95,7 @@ define <vscale x 4 x i32> @non_idempotent_mul_i32(<vscale x 4 x i1> %pg, <vscale define <vscale x 2 x i64> @non_idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @non_idempotent_mul_i64( ; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> splat (i64 2)) ; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 2) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul_u-idempotency.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul_u-idempotency.ll index 2e7475d..662b060 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul_u-idempotency.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul_u-idempotency.ll @@ -37,7 +37,7 @@ define <vscale x 2 x i64> @idempotent_mul_u_i64(<vscale x 2 x i1> %pg, <vscale x define <vscale x 2 x i64> @idempotent_mul_u_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @idempotent_mul_u_different_argument_order( ; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> [[A]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> [[A]]) ; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 1) @@ -61,7 +61,7 @@ define <vscale x 8 x i16> @idempotent_mul_u_two_dups(<vscale x 8 x i1> %pg, <vsc ; together is sane. ; CHECK-LABEL: define <vscale x 8 x i16> @idempotent_mul_u_two_dups( ; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: ret <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 8 x i16> splat (i16 1) ; %1 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 1) %2 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 1) @@ -73,7 +73,7 @@ define <vscale x 8 x i16> @idempotent_mul_u_two_dups(<vscale x 8 x i1> %pg, <vsc define <vscale x 8 x i16> @non_idempotent_mul_u_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 { ; CHECK-LABEL: define <vscale x 8 x i16> @non_idempotent_mul_u_i16( ; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; %1 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) @@ -84,7 +84,7 @@ define <vscale x 8 x i16> @non_idempotent_mul_u_i16(<vscale x 8 x i1> %pg, <vsca define <vscale x 4 x i32> @non_idempotent_mul_u_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @non_idempotent_mul_u_i32( ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> splat (i32 2)) ; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] ; %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 2) @@ -95,7 +95,7 @@ define <vscale x 4 x i32> @non_idempotent_mul_u_i32(<vscale x 4 x i1> %pg, <vsca define <vscale x 2 x i64> @non_idempotent_mul_u_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 { ; CHECK-LABEL: define <vscale x 2 x i64> @non_idempotent_mul_u_i64( ; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> splat (i64 2)) ; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] ; %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 2) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll index cc184b7..5ddb978 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll @@ -43,7 +43,7 @@ define <vscale x 2 x i64> @sdiv_i64_neg(<vscale x 2 x i64> %a, <vscale x 2 x i1> define <vscale x 4 x i32> @sdiv_i32_not_base2(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 { ; CHECK-LABEL: @sdiv_i32_not_base2( -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388607, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> splat (i32 8388607)) ; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]] ; %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> splat (i32 8388607)) @@ -52,7 +52,7 @@ define <vscale x 4 x i32> @sdiv_i32_not_base2(<vscale x 4 x i32> %a, <vscale x 4 define <vscale x 4 x i32> @sdiv_i32_not_base2_neg(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) #0 { ; CHECK-LABEL: @sdiv_i32_not_base2_neg( -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -8388607, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> splat (i32 -8388607)) ; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]] ; %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> splat (i32 -8388607)) @@ -83,7 +83,7 @@ define <vscale x 2 x i64> @divide_by_1(<vscale x 16 x i1> %p, <vscale x 2 x i64> define <vscale x 2 x i64> @divide_by_m1(<vscale x 16 x i1> %p, <vscale x 2 x i64> %a) #0 { ; CHECK-LABEL: @divide_by_m1( ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[P:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> splat (i64 -1)) ; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] ; %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 -1) diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 4825e58..222f87f 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -2683,8 +2683,8 @@ define i16 @add_sub_zext_constant(i8 %x) { define <vscale x 1 x i32> @add_to_or_scalable(<vscale x 1 x i32> %in) { ; CHECK-LABEL: @add_to_or_scalable( -; CHECK-NEXT: [[SHL:%.*]] = shl <vscale x 1 x i32> [[IN:%.*]], shufflevector (<vscale x 1 x i32> insertelement (<vscale x 1 x i32> poison, i32 1, i64 0), <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer) -; CHECK-NEXT: [[ADD:%.*]] = or disjoint <vscale x 1 x i32> [[SHL]], shufflevector (<vscale x 1 x i32> insertelement (<vscale x 1 x i32> poison, i32 1, i64 0), <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: [[SHL:%.*]] = shl <vscale x 1 x i32> [[IN:%.*]], splat (i32 1) +; CHECK-NEXT: [[ADD:%.*]] = or disjoint <vscale x 1 x i32> [[SHL]], splat (i32 1) ; CHECK-NEXT: ret <vscale x 1 x i32> [[ADD]] ; %shl = shl <vscale x 1 x i32> %in, splat (i32 1) diff --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll index 6344966..7e93612 100644 --- a/llvm/test/Transforms/InstCombine/div.ll +++ b/llvm/test/Transforms/InstCombine/div.ll @@ -1062,7 +1062,7 @@ define <2 x i8> @sdiv_by_negconst_v2i8(<2 x i8> %x) { define <vscale x 2 x i8> @sdiv_by_negconst_nxv2i8(<vscale x 2 x i8> %x) { ; CHECK-LABEL: @sdiv_by_negconst_nxv2i8( -; CHECK-NEXT: [[DIV_NEG:%.*]] = sdiv <vscale x 2 x i8> [[X:%.*]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 108, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[DIV_NEG:%.*]] = sdiv <vscale x 2 x i8> [[X:%.*]], splat (i8 108) ; CHECK-NEXT: ret <vscale x 2 x i8> [[DIV_NEG]] ; %div = sdiv <vscale x 2 x i8> %x, splat (i8 -108) @@ -1083,7 +1083,7 @@ define <2 x i8> @sdiv_by_minSigned_v2i8(<2 x i8> %x) { define <vscale x 2 x i8> @sdiv_by_minSigned_nxv2i8(<vscale x 2 x i8> %x) { ; CHECK-LABEL: @sdiv_by_minSigned_nxv2i8( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <vscale x 2 x i8> [[X:%.*]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 -128, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <vscale x 2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: [[DIV_NEG:%.*]] = sext <vscale x 2 x i1> [[TMP1]] to <vscale x 2 x i8> ; CHECK-NEXT: ret <vscale x 2 x i8> [[DIV_NEG]] ; diff --git a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll index c2c7ece..8a52f79 100644 --- a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll +++ b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll @@ -90,7 +90,7 @@ define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(<vscale x 4 x i8> %x) { ; CHECK-LABEL: define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8( ; CHECK-SAME: <vscale x 4 x i8> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = sext <vscale x 4 x i8> [[X]] to <vscale x 4 x i32> -; CHECK-NEXT: [[EXP2:%.*]] = call <vscale x 4 x float> @llvm.ldexp.nxv4f32.nxv4i32(<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP1]]) +; CHECK-NEXT: [[EXP2:%.*]] = call <vscale x 4 x float> @llvm.ldexp.nxv4f32.nxv4i32(<vscale x 4 x float> splat (float 1.000000e+00), <vscale x 4 x i32> [[TMP1]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[EXP2]] ; %itofp = sitofp <vscale x 4 x i8> %x to <vscale x 4 x float> diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll index 06c78a8..ad187e2 100644 --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -87,7 +87,7 @@ define <2 x float> @exact_inverse_splat(<2 x float> %x) { define <vscale x 2 x float> @exact_inverse_scalable_splat(<vscale x 2 x float> %x) { ; CHECK-LABEL: @exact_inverse_scalable_splat( -; CHECK-NEXT: [[DIV:%.*]] = fmul <vscale x 2 x float> [[X:%.*]], shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> poison, float 2.500000e-01, i64 0), <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[DIV:%.*]] = fmul <vscale x 2 x float> [[X:%.*]], splat (float 2.500000e-01) ; CHECK-NEXT: ret <vscale x 2 x float> [[DIV]] ; %div = fdiv <vscale x 2 x float> %x, splat (float 4.0) diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll index 51b70ef..70cbb43 100644 --- a/llvm/test/Transforms/InstCombine/fmul.ll +++ b/llvm/test/Transforms/InstCombine/fmul.ll @@ -832,8 +832,8 @@ define <2 x float> @fmul_fadd_distribute_vec(<2 x float> %x) { define <vscale x 2 x float> @fmul_fadd_distribute_scalablevec(<vscale x 2 x float> %x) { ; CHECK-LABEL: @fmul_fadd_distribute_scalablevec( -; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <vscale x 2 x float> [[X:%.*]], shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> poison, float 6.000000e+03, i64 0), <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[T3:%.*]] = fadd reassoc <vscale x 2 x float> [[TMP1]], shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> poison, float 1.200000e+07, i64 0), <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <vscale x 2 x float> [[X:%.*]], splat (float 6.000000e+03) +; CHECK-NEXT: [[T3:%.*]] = fadd reassoc <vscale x 2 x float> [[TMP1]], splat (float 1.200000e+07) ; CHECK-NEXT: ret <vscale x 2 x float> [[T3]] ; %t1 = fadd reassoc <vscale x 2 x float> splat (float 2.0e+3), %x diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll index 6a9b330..9692005 100644 --- a/llvm/test/Transforms/InstCombine/fneg.ll +++ b/llvm/test/Transforms/InstCombine/fneg.ll @@ -1113,7 +1113,7 @@ define float @test_fneg_select_maxnum(float %x) { define <vscale x 2 x double> @test_fneg_select_svec(<vscale x 2 x i1> %cond, <vscale x 2 x double> %b) { ; CHECK-LABEL: @test_fneg_select_svec( ; CHECK-NEXT: [[TMP2:%.*]] = fneg fast <vscale x 2 x double> [[TMP1:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = select fast <vscale x 2 x i1> [[COND:%.*]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = select fast <vscale x 2 x i1> [[COND:%.*]], <vscale x 2 x double> splat (double -0.000000e+00), <vscale x 2 x double> [[TMP2]] ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP3]] ; %1 = select <vscale x 2 x i1> %cond, <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %b @@ -1124,7 +1124,7 @@ define <vscale x 2 x double> @test_fneg_select_svec(<vscale x 2 x i1> %cond, <vs define <vscale x 2 x double> @test_fneg_select_svec_2(<vscale x 2 x i1> %cond, <vscale x 2 x double> %a) { ; CHECK-LABEL: @test_fneg_select_svec_2( ; CHECK-NEXT: [[A_NEG:%.*]] = fneg fast <vscale x 2 x double> [[A:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = select fast <vscale x 2 x i1> [[COND:%.*]], <vscale x 2 x double> [[A_NEG]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP1:%.*]] = select fast <vscale x 2 x i1> [[COND:%.*]], <vscale x 2 x double> [[A_NEG]], <vscale x 2 x double> splat (double -0.000000e+00) ; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] ; %1 = select <vscale x 2 x i1> %cond, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer @@ -1134,7 +1134,7 @@ define <vscale x 2 x double> @test_fneg_select_svec_2(<vscale x 2 x i1> %cond, < define <vscale x 2 x double> @test_fneg_select_svec_3(<vscale x 2 x i1> %cond, <vscale x 2 x double> %b) { ; CHECK-LABEL: @test_fneg_select_svec_3( -; CHECK-NEXT: ret <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x double> splat (double -0.000000e+00) ; %1 = select <vscale x 2 x i1> %cond, <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer %2 = fneg fast <vscale x 2 x double> %1 diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index 8f28049..2ea65eb 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -283,7 +283,7 @@ define <vscale x 2 x i1> @test13_scalable_scalable(i64 %X, ptr %P, <vscale x 2 x ; CHECK-LABEL: @test13_scalable_scalable( ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -; CHECK-NEXT: [[A_IDX:%.*]] = shl nsw <vscale x 2 x i64> [[DOTSPLAT]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 3, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[A_IDX:%.*]] = shl nsw <vscale x 2 x i64> [[DOTSPLAT]], splat (i64 3) ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP2]], i64 0 diff --git a/llvm/test/Transforms/InstCombine/icmp-vec.ll b/llvm/test/Transforms/InstCombine/icmp-vec.ll index 333b751..e133533 100644 --- a/llvm/test/Transforms/InstCombine/icmp-vec.ll +++ b/llvm/test/Transforms/InstCombine/icmp-vec.ll @@ -394,7 +394,7 @@ define <2 x i1> @icmp_logical_or_vec(<2 x i64> %x, <2 x i64> %y, <2 x i1> %false define <vscale x 2 x i1> @icmp_logical_or_scalablevec(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i1> %falseval) { ; CHECK-LABEL: @icmp_logical_or_scalablevec( ; CHECK-NEXT: [[CMP_NE:%.*]] = icmp ne <vscale x 2 x i64> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[SEL:%.*]] = select <vscale x 2 x i1> [[CMP_NE]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> [[FALSEVAL:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select <vscale x 2 x i1> [[CMP_NE]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i1> [[FALSEVAL:%.*]] ; CHECK-NEXT: ret <vscale x 2 x i1> [[SEL]] ; %cmp.ne = icmp ne <vscale x 2 x i64> %x, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index c9e7cba..9467f50 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -1334,7 +1334,7 @@ define i8 @nonzero_reduce_xor_vscale_even(<vscale x 2 x i8> %xx) { define i8 @nonzero_reduce_xor_vscale_odd_fail(<vscale x 3 x i8> %xx) { ; CHECK-LABEL: @nonzero_reduce_xor_vscale_odd_fail( -; CHECK-NEXT: [[X:%.*]] = or <vscale x 3 x i8> [[XX:%.*]], shufflevector (<vscale x 3 x i8> insertelement (<vscale x 3 x i8> poison, i8 1, i64 0), <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer) +; CHECK-NEXT: [[X:%.*]] = or <vscale x 3 x i8> [[XX:%.*]], splat (i8 1) ; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.xor.nxv3i8(<vscale x 3 x i8> [[X]]) ; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1 ; CHECK-NEXT: ret i8 [[R]] @@ -1349,7 +1349,7 @@ define i8 @nonzero_reduce_xor_vscale_odd_fail(<vscale x 3 x i8> %xx) { define i8 @nonzero_reduce_xor_vscale_even_fail(<vscale x 2 x i8> %xx) { ; CHECK-LABEL: @nonzero_reduce_xor_vscale_even_fail( -; CHECK-NEXT: [[X:%.*]] = or <vscale x 2 x i8> [[XX:%.*]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[X:%.*]] = or <vscale x 2 x i8> [[XX:%.*]], splat (i8 1) ; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> [[X]]) ; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 2 ; CHECK-NEXT: ret i8 [[R]] diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll index b3fa3da..9a5db31 100644 --- a/llvm/test/Transforms/InstCombine/load-store-forward.ll +++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll @@ -103,7 +103,7 @@ define i32 @vec_store_load_overlap(ptr %p) { define i32 @load_i32_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_i32_store_nxv4i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[A:%.*]], align 16 +; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: ret i32 [[TMP0]] ; @@ -116,7 +116,7 @@ entry: define i64 @load_i64_store_nxv8i8(ptr %a) { ; CHECK-LABEL: @load_i64_store_nxv8i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), ptr [[A:%.*]], align 16 +; CHECK-NEXT: store <vscale x 8 x i8> splat (i8 1), ptr [[A:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[A]], align 8 ; CHECK-NEXT: ret i64 [[LOAD]] ; @@ -129,7 +129,7 @@ entry: define i64 @load_i64_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_i64_store_nxv4i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[A:%.*]], align 16 +; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[A]], align 8 ; CHECK-NEXT: ret i64 [[LOAD]] ; @@ -142,7 +142,7 @@ entry: define i8 @load_i8_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_i8_store_nxv4i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[A:%.*]], align 16 +; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[A]], align 1 ; CHECK-NEXT: ret i8 [[LOAD]] ; @@ -155,7 +155,7 @@ entry: define float @load_f32_store_nxv4f32(ptr %a) { ; CHECK-LABEL: @load_f32_store_nxv4f32( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), ptr [[A:%.*]], align 16 +; CHECK-NEXT: store <vscale x 4 x float> splat (float 1.000000e+00), ptr [[A:%.*]], align 16 ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A]], align 4 ; CHECK-NEXT: ret float [[TMP0]] ; @@ -168,7 +168,7 @@ entry: define i32 @load_i32_store_nxv4f32(ptr %a) { ; CHECK-LABEL: @load_i32_store_nxv4f32( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), ptr [[A:%.*]], align 16 +; CHECK-NEXT: store <vscale x 4 x float> splat (float 1.000000e+00), ptr [[A:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: ret i32 [[LOAD]] ; @@ -181,7 +181,7 @@ entry: define <4 x i32> @load_v4i32_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_v4i32_store_nxv4i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[A:%.*]], align 16 +; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16 ; CHECK-NEXT: ret <4 x i32> [[TMP0]] ; @@ -194,7 +194,7 @@ entry: define <4 x i16> @load_v4i16_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_v4i16_store_nxv4i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[A:%.*]], align 16 +; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 16 ; CHECK-NEXT: ret <4 x i16> [[TMP0]] ; @@ -208,7 +208,7 @@ entry: define i64 @load_i64_store_nxv4i8(ptr %a) { ; CHECK-LABEL: @load_i64_store_nxv4i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <vscale x 4 x i8> shufflevector (<vscale x 4 x i8> insertelement (<vscale x 4 x i8> poison, i8 1, i64 0), <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer), ptr [[A:%.*]], align 16 +; CHECK-NEXT: store <vscale x 4 x i8> splat (i8 1), ptr [[A:%.*]], align 16 ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[A]], align 8 ; CHECK-NEXT: ret i64 [[LOAD]] ; @@ -223,7 +223,7 @@ entry: define <vscale x 4 x i8> @load_nxv4i8_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_nxv4i8_store_nxv4i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[A:%.*]], align 16 +; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16 ; CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x i8>, ptr [[A]], align 16 ; CHECK-NEXT: ret <vscale x 4 x i8> [[TMP0]] ; diff --git a/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll b/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll index 0c4418a..b8d405e 100644 --- a/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll +++ b/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll @@ -187,7 +187,7 @@ define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(<2 x i32> %x) { define <vscale x 4 x float> @pow_sitofp_nxv4f32_const_base_2(<vscale x 4 x i32> %x) { ; CHECK-LABEL: define <vscale x 4 x float> @pow_sitofp_nxv4f32_const_base_2( ; CHECK-SAME: <vscale x 4 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[EXP2:%.*]] = tail call <vscale x 4 x float> @llvm.ldexp.nxv4f32.nxv4i32(<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[X]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call <vscale x 4 x float> @llvm.ldexp.nxv4f32.nxv4i32(<vscale x 4 x float> splat (float 1.000000e+00), <vscale x 4 x i32> [[X]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[EXP2]] ; %itofp = sitofp <vscale x 4 x i32> %x to <vscale x 4 x float> diff --git a/llvm/test/Transforms/InstCombine/pr83931.ll b/llvm/test/Transforms/InstCombine/pr83931.ll index d36ac8d..0eaf926 100644 --- a/llvm/test/Transforms/InstCombine/pr83931.ll +++ b/llvm/test/Transforms/InstCombine/pr83931.ll @@ -5,7 +5,7 @@ define <vscale x 2 x i1> @dont_crash(<vscale x 2 x i64> %x) { ; CHECK-LABEL: define <vscale x 2 x i1> @dont_crash( ; CHECK-SAME: <vscale x 2 x i64> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RET:%.*]] = icmp sgt <vscale x 2 x i64> [[X]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -309383, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[RET:%.*]] = icmp sgt <vscale x 2 x i64> [[X]], splat (i64 -309383) ; CHECK-NEXT: ret <vscale x 2 x i1> [[RET]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/scalable-const-fp-splat.ll b/llvm/test/Transforms/InstCombine/scalable-const-fp-splat.ll index 00a8153..04204db 100644 --- a/llvm/test/Transforms/InstCombine/scalable-const-fp-splat.ll +++ b/llvm/test/Transforms/InstCombine/scalable-const-fp-splat.ll @@ -4,7 +4,7 @@ define <vscale x 2 x float> @shrink_splat_scalable_extend(<vscale x 2 x float> %a) { ; CHECK-LABEL: define <vscale x 2 x float> @shrink_splat_scalable_extend( ; CHECK-SAME: <vscale x 2 x float> [[A:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = fadd <vscale x 2 x float> [[A]], shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float -1.000000e+00, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP1:%.*]] = fadd <vscale x 2 x float> [[A]], splat (float -1.000000e+00) ; CHECK-NEXT: ret <vscale x 2 x float> [[TMP1]] ; %1 = shufflevector <vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float -1.000000e+00, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/scalable-select.ll b/llvm/test/Transforms/InstCombine/scalable-select.ll index eb7a66a..d170fd99 100644 --- a/llvm/test/Transforms/InstCombine/scalable-select.ll +++ b/llvm/test/Transforms/InstCombine/scalable-select.ll @@ -5,7 +5,7 @@ ; Constant::getUniqueInteger would crash for a scalable-vector zeroinitializer. define <vscale x 1 x i32> @select_opt(<vscale x 1 x i32> %b, <vscale x 1 x i1> %m) { ; CHECK-LABEL: @select_opt( -; CHECK-NEXT: [[C:%.*]] = add nsw <vscale x 1 x i32> [[B:%.*]], shufflevector (<vscale x 1 x i32> insertelement (<vscale x 1 x i32> undef, i32 2, i32 0), <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: [[C:%.*]] = add nsw <vscale x 1 x i32> [[B:%.*]], splat (i32 2) ; CHECK-NEXT: [[D:%.*]] = select <vscale x 1 x i1> [[M:%.*]], <vscale x 1 x i32> [[C]], <vscale x 1 x i32> [[B]] ; CHECK-NEXT: ret <vscale x 1 x i32> [[D]] ; diff --git a/llvm/test/Transforms/InstCombine/select-masked_gather.ll b/llvm/test/Transforms/InstCombine/select-masked_gather.ll index a232bdb..911e6b48 100644 --- a/llvm/test/Transforms/InstCombine/select-masked_gather.ll +++ b/llvm/test/Transforms/InstCombine/select-masked_gather.ll @@ -38,7 +38,7 @@ define <vscale x 2 x i32> @masked_gather_and_zero_inactive_3(<vscale x 2 x ptr> ; Remove redundant select when its mask doesn't overlap with the gather mask. define <vscale x 2 x i32> @masked_gather_and_zero_inactive_4(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %inv_mask) { ; CHECK-LABEL: @masked_gather_and_zero_inactive_4( -; CHECK-NEXT: [[MASK:%.*]] = xor <vscale x 2 x i1> [[INV_MASK:%.*]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[MASK:%.*]] = xor <vscale x 2 x i1> [[INV_MASK:%.*]], splat (i1 true) ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK]], <vscale x 2 x i32> zeroinitializer) ; CHECK-NEXT: ret <vscale x 2 x i32> [[GATHER]] ; @@ -52,7 +52,7 @@ define <vscale x 2 x i32> @masked_gather_and_zero_inactive_4(<vscale x 2 x ptr> ; As above but reuse the gather's existing passthrough. define <vscale x 2 x i32> @masked_gather_and_zero_inactive_5(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %inv_mask) { ; CHECK-LABEL: @masked_gather_and_zero_inactive_5( -; CHECK-NEXT: [[MASK:%.*]] = xor <vscale x 2 x i1> [[INV_MASK:%.*]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[MASK:%.*]] = xor <vscale x 2 x i1> [[INV_MASK:%.*]], splat (i1 true) ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK]], <vscale x 2 x i32> zeroinitializer) ; CHECK-NEXT: ret <vscale x 2 x i32> [[GATHER]] ; @@ -66,7 +66,7 @@ define <vscale x 2 x i32> @masked_gather_and_zero_inactive_5(<vscale x 2 x ptr> ; No transform when the gather's passthrough cannot be reused or altered. define <vscale x 2 x i32> @masked_gather_and_zero_inactive_6(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %inv_mask, <vscale x 2 x i32> %passthrough) { ; CHECK-LABEL: @masked_gather_and_zero_inactive_6( -; CHECK-NEXT: [[MASK:%.*]] = xor <vscale x 2 x i1> [[INV_MASK:%.*]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[MASK:%.*]] = xor <vscale x 2 x i1> [[INV_MASK:%.*]], splat (i1 true) ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK]], <vscale x 2 x i32> [[PASSTHROUGH:%.*]]) ; CHECK-NEXT: [[MASKED:%.*]] = select <vscale x 2 x i1> [[INV_MASK]], <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> [[GATHER]] ; CHECK-NEXT: ret <vscale x 2 x i32> [[MASKED]] @@ -94,7 +94,7 @@ define <vscale x 2 x i32> @masked_gather_and_zero_inactive_7(<vscale x 2 x ptr> ; gather's inactive lanes and thus the gather's passthrough takes effect. define <vscale x 2 x float> @masked_gather_and_zero_inactive_8(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %inv_mask, <vscale x 2 x i1> %cond) { ; CHECK-LABEL: @masked_gather_and_zero_inactive_8( -; CHECK-NEXT: [[MASK:%.*]] = xor <vscale x 2 x i1> [[INV_MASK:%.*]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[MASK:%.*]] = xor <vscale x 2 x i1> [[INV_MASK:%.*]], splat (i1 true) ; CHECK-NEXT: [[PG:%.*]] = and <vscale x 2 x i1> [[COND:%.*]], [[MASK]] ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[PG]], <vscale x 2 x float> zeroinitializer) ; CHECK-NEXT: ret <vscale x 2 x float> [[GATHER]] diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 9e92d22..a3221d7 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -99,7 +99,7 @@ define <2 x i1> @test9vec(<2 x i1> %C, <2 x i1> %X) { define <vscale x 2 x i1> @test9vvec(<vscale x 2 x i1> %C, <vscale x 2 x i1> %X) { ; CHECK-LABEL: @test9vvec( -; CHECK-NEXT: [[NOT_C:%.*]] = xor <vscale x 2 x i1> [[C:%.*]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[NOT_C:%.*]] = xor <vscale x 2 x i1> [[C:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <vscale x 2 x i1> [[NOT_C]], <vscale x 2 x i1> [[X:%.*]], <vscale x 2 x i1> zeroinitializer ; CHECK-NEXT: ret <vscale x 2 x i1> [[R]] ; @@ -3499,7 +3499,7 @@ define i32 @select_cond_not_cond_cond2(i1 %cond) { ; scalable vector splat ConstantExprs. define <vscale x 2 x i32> @and_constant_select_svec(<vscale x 2 x i32> %x, <vscale x 2 x i1> %cond) { ; CHECK-LABEL: @and_constant_select_svec( -; CHECK-NEXT: [[A:%.*]] = and <vscale x 2 x i32> [[X:%.*]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[A:%.*]] = and <vscale x 2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[B:%.*]] = select <vscale x 2 x i1> [[COND:%.*]], <vscale x 2 x i32> [[A]], <vscale x 2 x i32> [[X]] ; CHECK-NEXT: ret <vscale x 2 x i32> [[B]] ; @@ -3511,7 +3511,7 @@ define <vscale x 2 x i32> @and_constant_select_svec(<vscale x 2 x i32> %x, <vsca define <vscale x 2 x i32> @scalable_sign_bits(<vscale x 2 x i8> %x) { ; CHECK-LABEL: @scalable_sign_bits( ; CHECK-NEXT: [[A:%.*]] = sext <vscale x 2 x i8> [[X:%.*]] to <vscale x 2 x i32> -; CHECK-NEXT: [[B:%.*]] = shl nsw <vscale x 2 x i32> [[A]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 16, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[B:%.*]] = shl nsw <vscale x 2 x i32> [[A]], splat (i32 16) ; CHECK-NEXT: ret <vscale x 2 x i32> [[B]] ; %a = sext <vscale x 2 x i8> %x to <vscale x 2 x i32> @@ -3521,8 +3521,8 @@ define <vscale x 2 x i32> @scalable_sign_bits(<vscale x 2 x i8> %x) { define <vscale x 2 x i1> @scalable_non_zero(<vscale x 2 x i32> %x) { ; CHECK-LABEL: @scalable_non_zero( -; CHECK-NEXT: [[A:%.*]] = or <vscale x 2 x i32> [[X:%.*]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <vscale x 2 x i32> [[A]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 57, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[A:%.*]] = or <vscale x 2 x i32> [[X:%.*]], splat (i32 1) +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <vscale x 2 x i32> [[A]], splat (i32 57) ; CHECK-NEXT: ret <vscale x 2 x i1> [[CMP]] ; %a = or <vscale x 2 x i32> %x, splat (i32 1) diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index d72a184..2ff3c3c 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -1385,7 +1385,7 @@ define <2 x i8> @ashr_demanded_bits_splat(<2 x i8> %x) { define <vscale x 8 x i8> @ashr_demanded_bits_splat2(<vscale x 8 x i8> %x) { ; CHECK-LABEL: @ashr_demanded_bits_splat2( -; CHECK-NEXT: [[SHR:%.*]] = ashr <vscale x 8 x i8> [[X:%.*]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 7, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[SHR:%.*]] = ashr <vscale x 8 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: ret <vscale x 8 x i8> [[SHR]] ; %and = and <vscale x 8 x i8> %x, splat (i8 128) @@ -1405,7 +1405,7 @@ define <2 x i8> @lshr_demanded_bits_splat(<2 x i8> %x) { define <vscale x 8 x i8> @lshr_demanded_bits_splat2(<vscale x 8 x i8> %x) { ; CHECK-LABEL: @lshr_demanded_bits_splat2( -; CHECK-NEXT: [[SHR:%.*]] = lshr <vscale x 8 x i8> [[X:%.*]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 7, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[SHR:%.*]] = lshr <vscale x 8 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: ret <vscale x 8 x i8> [[SHR]] ; %and = and <vscale x 8 x i8> %x, splat (i8 128) diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index b131300..4fc2254 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -841,7 +841,7 @@ define <2 x i32> @test44vec(<2 x i32> %x) { define <vscale x 2 x i32> @test44scalablevec(<vscale x 2 x i32> %x) { ; CHECK-LABEL: @test44scalablevec( -; CHECK-NEXT: [[SUB:%.*]] = add nsw <vscale x 2 x i32> [[X:%.*]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 -32768, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[SUB:%.*]] = add nsw <vscale x 2 x i32> [[X:%.*]], splat (i32 -32768) ; CHECK-NEXT: ret <vscale x 2 x i32> [[SUB]] ; %sub = sub nsw <vscale x 2 x i32> %x, splat (i32 32768) @@ -861,7 +861,7 @@ define <2 x i16> @test44vecminval(<2 x i16> %x) { ; uses m_ImmConstant which matches Constant but (explicitly) not ConstantExpr. define <vscale x 2 x i16> @test44scalablevecminval(<vscale x 2 x i16> %x) { ; CHECK-LABEL: @test44scalablevecminval( -; CHECK-NEXT: [[SUB:%.*]] = add <vscale x 2 x i16> [[X:%.*]], shufflevector (<vscale x 2 x i16> insertelement (<vscale x 2 x i16> poison, i16 -32768, i64 0), <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[SUB:%.*]] = add <vscale x 2 x i16> [[X:%.*]], splat (i16 -32768) ; CHECK-NEXT: ret <vscale x 2 x i16> [[SUB]] ; %sub = sub nsw <vscale x 2 x i16> %x, splat (i16 -32768) diff --git a/llvm/test/Transforms/InstCombine/udiv-simplify.ll b/llvm/test/Transforms/InstCombine/udiv-simplify.ll index 0af3348..9897986 100644 --- a/llvm/test/Transforms/InstCombine/udiv-simplify.ll +++ b/llvm/test/Transforms/InstCombine/udiv-simplify.ll @@ -158,7 +158,7 @@ define i8 @udiv_exact_demanded_low_bits_clear(i8 %a) { define <vscale x 1 x i32> @udiv_demanded3(<vscale x 1 x i32> %a) { ; CHECK-LABEL: @udiv_demanded3( -; CHECK-NEXT: [[U:%.*]] = udiv <vscale x 1 x i32> [[A:%.*]], shufflevector (<vscale x 1 x i32> insertelement (<vscale x 1 x i32> poison, i32 12, i64 0), <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: [[U:%.*]] = udiv <vscale x 1 x i32> [[A:%.*]], splat (i32 12) ; CHECK-NEXT: ret <vscale x 1 x i32> [[U]] ; %o = or <vscale x 1 x i32> %a, splat (i32 3) diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll index 80fe4b7..c6329af16 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll @@ -1485,7 +1485,7 @@ define <4 x i32> @splat_assoc_add(<4 x i32> %x, <4 x i32> %y) { define <vscale x 4 x i32> @vsplat_assoc_add(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { ; CHECK-LABEL: @vsplat_assoc_add( -; CHECK-NEXT: [[TMP1:%.*]] = add <vscale x 4 x i32> [[X:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 317426, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP1:%.*]] = add <vscale x 4 x i32> [[X:%.*]], splat (i32 317426) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = add <vscale x 4 x i32> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index 9fb68b5..dd9fab7 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -1490,7 +1490,7 @@ define <4 x i32> @splat_assoc_add(<4 x i32> %x, <4 x i32> %y) { define <vscale x 4 x i32> @vsplat_assoc_add(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { ; CHECK-LABEL: @vsplat_assoc_add( -; CHECK-NEXT: [[TMP1:%.*]] = add <vscale x 4 x i32> [[X:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 317426, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP1:%.*]] = add <vscale x 4 x i32> [[X:%.*]], splat (i32 317426) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = add <vscale x 4 x i32> [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] diff --git a/llvm/test/Transforms/InstCombine/vscale_cmp.ll b/llvm/test/Transforms/InstCombine/vscale_cmp.ll index b2bfc93da..dfbf94c 100644 --- a/llvm/test/Transforms/InstCombine/vscale_cmp.ll +++ b/llvm/test/Transforms/InstCombine/vscale_cmp.ll @@ -3,7 +3,7 @@ define <vscale x 2 x i1> @sge(<vscale x 2 x i8> %x) { ; CHECK-LABEL: @sge( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <vscale x 2 x i8> [[X:%.*]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 -1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <vscale x 2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <vscale x 2 x i1> [[CMP]] ; %cmp = icmp sge <vscale x 2 x i8> %x, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll index 97412d6..75930c1 100644 --- a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll +++ b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll @@ -42,7 +42,7 @@ define <2 x i8> @trunc_ctlz_zext_v2i8_v2i33(<2 x i8> %x) { define <vscale x 2 x i16> @trunc_ctlz_zext_nxv2i16_nxv2i64(<vscale x 2 x i16> %x) { ; CHECK-LABEL: @trunc_ctlz_zext_nxv2i16_nxv2i64( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i16 0, 17) <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> [[X:%.*]], i1 false) -; CHECK-NEXT: [[ZZ:%.*]] = add nuw nsw <vscale x 2 x i16> [[TMP1]], shufflevector (<vscale x 2 x i16> insertelement (<vscale x 2 x i16> poison, i16 48, i64 0), <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[ZZ:%.*]] = add nuw nsw <vscale x 2 x i16> [[TMP1]], splat (i16 48) ; CHECK-NEXT: ret <vscale x 2 x i16> [[ZZ]] ; %z = zext <vscale x 2 x i16> %x to <vscale x 2 x i64> @@ -74,7 +74,7 @@ define <vscale x 2 x i16> @trunc_ctlz_zext_nxv2i16_nxv2i63_multiple_uses(<vscale ; CHECK-LABEL: @trunc_ctlz_zext_nxv2i16_nxv2i63_multiple_uses( ; CHECK-NEXT: [[Z:%.*]] = zext <vscale x 2 x i16> [[X:%.*]] to <vscale x 2 x i63> ; CHECK-NEXT: [[TMP1:%.*]] = call range(i16 0, 17) <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> [[X]], i1 true) -; CHECK-NEXT: [[ZZ:%.*]] = add nuw nsw <vscale x 2 x i16> [[TMP1]], shufflevector (<vscale x 2 x i16> insertelement (<vscale x 2 x i16> poison, i16 47, i64 0), <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[ZZ:%.*]] = add nuw nsw <vscale x 2 x i16> [[TMP1]], splat (i16 47) ; CHECK-NEXT: call void @use1(<vscale x 2 x i63> [[Z]]) ; CHECK-NEXT: ret <vscale x 2 x i16> [[ZZ]] ; diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/extractelement-vscale.ll b/llvm/test/Transforms/InstSimplify/ConstProp/extractelement-vscale.ll index de2ee65..b12fb35 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/extractelement-vscale.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/extractelement-vscale.ll @@ -31,7 +31,7 @@ define i32 @extractconstant_shuffle_in_range(i32 %v) { define i32 @extractconstant_shuffle_maybe_out_of_range(i32 %v) { ; CHECK-LABEL: define i32 @extractconstant_shuffle_maybe_out_of_range( ; CHECK-SAME: i32 [[V:%.*]]) { -; CHECK-NEXT: ret i32 extractelement (<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1024, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer), i32 4) +; CHECK-NEXT: ret i32 extractelement (<vscale x 4 x i32> splat (i32 1024), i32 4) ; %in = insertelement <vscale x 4 x i32> undef, i32 1024, i32 0 %splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer @@ -42,7 +42,7 @@ define i32 @extractconstant_shuffle_maybe_out_of_range(i32 %v) { define i32 @extractconstant_shuffle_invalid_index(i32 %v) { ; CHECK-LABEL: define i32 @extractconstant_shuffle_invalid_index( ; CHECK-SAME: i32 [[V:%.*]]) { -; CHECK-NEXT: ret i32 extractelement (<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1024, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer), i32 -1) +; CHECK-NEXT: ret i32 extractelement (<vscale x 4 x i32> splat (i32 1024), i32 -1) ; %in = insertelement <vscale x 4 x i32> undef, i32 1024, i32 0 %splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll index e1c46a6..a38dfaf 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll @@ -43,7 +43,7 @@ define <vscale x 4 x i32> @sub() { define <vscale x 4 x i32> @sub_splat() { ; CHECK-LABEL: @sub_splat( -; CHECK-NEXT: ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -16, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i32> splat (i32 -16) ; %r = sub <vscale x 4 x i32> zeroinitializer, splat (i32 16) ret <vscale x 4 x i32> %r @@ -195,7 +195,7 @@ define <vscale x 4 x i32> @insertelement() { define <vscale x 4 x i32> @shufflevector() { ; CHECK-LABEL: @shufflevector( -; CHECK-NEXT: ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i32> splat (i32 1) ; %i = insertelement <vscale x 4 x i32> poison, i32 1, i32 0 %i2 = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer @@ -208,7 +208,7 @@ define <vscale x 4 x i32> @shufflevector() { define <vscale x 4 x float> @bitcast() { ; CHECK-LABEL: @bitcast( -; CHECK-NEXT: ret <vscale x 4 x float> bitcast (<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x float>) +; CHECK-NEXT: ret <vscale x 4 x float> bitcast (<vscale x 4 x i32> splat (i32 1) to <vscale x 4 x float>) ; %i1 = insertelement <vscale x 4 x i32> poison, i32 1, i32 0 %i2 = shufflevector <vscale x 4 x i32> %i1, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer @@ -249,7 +249,7 @@ define <vscale x 4 x i1> @icmp_undef() { define <vscale x 4 x i1> @icmp_zero() { ; CHECK-LABEL: @icmp_zero( -; CHECK-NEXT: ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i1> splat (i1 true) ; %r = icmp eq <vscale x 4 x i32> zeroinitializer, zeroinitializer ret <vscale x 4 x i1> %r @@ -257,7 +257,7 @@ define <vscale x 4 x i1> @icmp_zero() { define <vscale x 4 x i1> @fcmp_true() { ; CHECK-LABEL: @fcmp_true( -; CHECK-NEXT: ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i1> splat (i1 true) ; %r = fcmp true <vscale x 4 x float> undef, undef ret <vscale x 4 x i1> %r @@ -281,7 +281,7 @@ define <vscale x 4 x i1> @fcmp_undef() { define <vscale x 4 x i1> @fcmp_not_equality() { ; CHECK-LABEL: @fcmp_not_equality( -; CHECK-NEXT: ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i1> splat (i1 true) ; %r = icmp ule <vscale x 4 x i32> undef, zeroinitializer ret <vscale x 4 x i1> %r diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector-inseltpoison.ll index 912180e..aa37cdd 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector-inseltpoison.ll @@ -18,7 +18,7 @@ target triple = "aarch64" define <vscale x 8 x i1> @vscale_version() { ; CHECK-LABEL: define <vscale x 8 x i1> @vscale_version() { -; CHECK-NEXT: ret <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 8 x i1> splat (i1 true) ; %splatter = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 %foo = shufflevector <vscale x 8 x i1> %splatter, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll index 6b88b1d..eb0dd69 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll @@ -18,7 +18,7 @@ target triple = "aarch64" define <vscale x 8 x i1> @vscale_version() { ; CHECK-LABEL: define <vscale x 8 x i1> @vscale_version() { -; CHECK-NEXT: ret <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 8 x i1> splat (i1 true) ; %splatter = insertelement <vscale x 8 x i1> undef, i1 true, i32 0 %foo = shufflevector <vscale x 8 x i1> %splatter, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll index 8ace257..e24f574 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll @@ -43,7 +43,7 @@ define <vscale x 4 x i32> @sub() { define <vscale x 4 x i32> @sub_splat() { ; CHECK-LABEL: @sub_splat( -; CHECK-NEXT: ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -16, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i32> splat (i32 -16) ; %r = sub <vscale x 4 x i32> zeroinitializer, splat (i32 16) ret <vscale x 4 x i32> %r @@ -195,7 +195,7 @@ define <vscale x 4 x i32> @insertelement() { define <vscale x 4 x i32> @shufflevector() { ; CHECK-LABEL: @shufflevector( -; CHECK-NEXT: ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i32> splat (i32 1) ; %i = insertelement <vscale x 4 x i32> undef, i32 1, i32 0 %i2 = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer @@ -208,7 +208,7 @@ define <vscale x 4 x i32> @shufflevector() { define <vscale x 4 x float> @bitcast() { ; CHECK-LABEL: @bitcast( -; CHECK-NEXT: ret <vscale x 4 x float> bitcast (<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x float>) +; CHECK-NEXT: ret <vscale x 4 x float> bitcast (<vscale x 4 x i32> splat (i32 1) to <vscale x 4 x float>) ; %i1 = insertelement <vscale x 4 x i32> undef, i32 1, i32 0 %i2 = shufflevector <vscale x 4 x i32> %i1, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer @@ -249,7 +249,7 @@ define <vscale x 4 x i1> @icmp_undef() { define <vscale x 4 x i1> @icmp_zero() { ; CHECK-LABEL: @icmp_zero( -; CHECK-NEXT: ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i1> splat (i1 true) ; %r = icmp eq <vscale x 4 x i32> zeroinitializer, zeroinitializer ret <vscale x 4 x i1> %r @@ -257,7 +257,7 @@ define <vscale x 4 x i1> @icmp_zero() { define <vscale x 4 x i1> @fcmp_true() { ; CHECK-LABEL: @fcmp_true( -; CHECK-NEXT: ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i1> splat (i1 true) ; %r = fcmp true <vscale x 4 x float> undef, undef ret <vscale x 4 x i1> %r @@ -281,7 +281,7 @@ define <vscale x 4 x i1> @fcmp_undef() { define <vscale x 4 x i1> @fcmp_not_equality() { ; CHECK-LABEL: @fcmp_not_equality( -; CHECK-NEXT: ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i1> splat (i1 true) ; %r = icmp ule <vscale x 4 x i32> undef, zeroinitializer ret <vscale x 4 x i1> %r diff --git a/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll b/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll index f9cad7e..e3e31cb 100644 --- a/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll +++ b/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll @@ -13,7 +13,7 @@ define <2 x i1> @i32cmp_eq_fixed_zero() { define <vscale x 2 x i1> @i32cmp_eq_scalable_zero() { ; CHECK-LABEL: @i32cmp_eq_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = icmp eq <vscale x 2 x i32> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -29,7 +29,7 @@ define <2 x i1> @i32cmp_eq_fixed_one() { define <vscale x 2 x i1> @i32cmp_eq_scalable_one() { ; CHECK-LABEL: @i32cmp_eq_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = icmp eq <vscale x 2 x i32> splat (i32 1), splat (i32 1) ret <vscale x 2 x i1> %res @@ -109,7 +109,7 @@ define <2 x i1> @i32cmp_uge_fixed_zero() { define <vscale x 2 x i1> @i32cmp_uge_scalable_zero() { ; CHECK-LABEL: @i32cmp_uge_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = icmp uge <vscale x 2 x i32> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -125,7 +125,7 @@ define <2 x i1> @i32cmp_uge_fixed_one() { define <vscale x 2 x i1> @i32cmp_uge_scalable_one() { ; CHECK-LABEL: @i32cmp_uge_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = icmp uge <vscale x 2 x i32> splat (i32 1), splat (i32 1) ret <vscale x 2 x i1> %res @@ -173,7 +173,7 @@ define <2 x i1> @i32cmp_ule_fixed_zero() { define <vscale x 2 x i1> @i32cmp_ule_scalable_zero() { ; CHECK-LABEL: @i32cmp_ule_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = icmp ule <vscale x 2 x i32> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -189,7 +189,7 @@ define <2 x i1> @i32cmp_ule_fixed_one() { define <vscale x 2 x i1> @i32cmp_ule_scalable_one() { ; CHECK-LABEL: @i32cmp_ule_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = icmp ule <vscale x 2 x i32> splat (i32 1), splat (i32 1) ret <vscale x 2 x i1> %res @@ -237,7 +237,7 @@ define <2 x i1> @i32cmp_sge_fixed_zero() { define <vscale x 2 x i1> @i32cmp_sge_scalable_zero() { ; CHECK-LABEL: @i32cmp_sge_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = icmp sge <vscale x 2 x i32> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -253,7 +253,7 @@ define <2 x i1> @i32cmp_sge_fixed_one() { define <vscale x 2 x i1> @i32cmp_sge_scalable_one() { ; CHECK-LABEL: @i32cmp_sge_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = icmp sge <vscale x 2 x i32> splat (i32 1), splat (i32 1) ret <vscale x 2 x i1> %res @@ -301,7 +301,7 @@ define <2 x i1> @i32cmp_sle_fixed_zero() { define <vscale x 2 x i1> @i32cmp_sle_scalable_zero() { ; CHECK-LABEL: @i32cmp_sle_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = icmp sle <vscale x 2 x i32> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -317,7 +317,7 @@ define <2 x i1> @i32cmp_sle_fixed_one() { define <vscale x 2 x i1> @i32cmp_sle_scalable_one() { ; CHECK-LABEL: @i32cmp_sle_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = icmp sle <vscale x 2 x i32> splat (i32 1), splat (i32 1) ret <vscale x 2 x i1> %res @@ -365,7 +365,7 @@ define <2 x i1> @floatcmp_oeq_fixed_zero() { define <vscale x 2 x i1> @floatcmp_oeq_scalable_zero() { ; CHECK-LABEL: @floatcmp_oeq_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp oeq <vscale x 2 x float> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -381,7 +381,7 @@ define <2 x i1> @floatcmp_oeq_fixed_one() { define <vscale x 2 x i1> @floatcmp_oeq_scalable_one() { ; CHECK-LABEL: @floatcmp_oeq_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp oeq <vscale x 2 x float> splat (float 1.0), splat (float 1.0) ret <vscale x 2 x i1> %res @@ -429,7 +429,7 @@ define <2 x i1> @floatcmp_oge_fixed_zero() { define <vscale x 2 x i1> @floatcmp_oge_scalable_zero() { ; CHECK-LABEL: @floatcmp_oge_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp oge <vscale x 2 x float> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -445,7 +445,7 @@ define <2 x i1> @floatcmp_oge_fixed_one() { define <vscale x 2 x i1> @floatcmp_oge_scalable_one() { ; CHECK-LABEL: @floatcmp_oge_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp oge <vscale x 2 x float> splat (float 1.0), splat (float 1.0) ret <vscale x 2 x i1> %res @@ -493,7 +493,7 @@ define <2 x i1> @floatcmp_ole_fixed_zero() { define <vscale x 2 x i1> @floatcmp_ole_scalable_zero() { ; CHECK-LABEL: @floatcmp_ole_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp ole <vscale x 2 x float> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -509,7 +509,7 @@ define <2 x i1> @floatcmp_ole_fixed_one() { define <vscale x 2 x i1> @floatcmp_ole_scalable_one() { ; CHECK-LABEL: @floatcmp_ole_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp ole <vscale x 2 x float> splat (float 1.0), splat (float 1.0) ret <vscale x 2 x i1> %res @@ -557,7 +557,7 @@ define <2 x i1> @floatcmp_ord_fixed_zero() { define <vscale x 2 x i1> @floatcmp_ord_scalable_zero() { ; CHECK-LABEL: @floatcmp_ord_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp ord <vscale x 2 x float> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -573,7 +573,7 @@ define <2 x i1> @floatcmp_ord_fixed_one() { define <vscale x 2 x i1> @floatcmp_ord_scalable_one() { ; CHECK-LABEL: @floatcmp_ord_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp ord <vscale x 2 x float> splat (float 1.0), splat (float 1.0) ret <vscale x 2 x i1> %res @@ -589,7 +589,7 @@ define <2 x i1> @floatcmp_ueq_fixed_zero() { define <vscale x 2 x i1> @floatcmp_ueq_scalable_zero() { ; CHECK-LABEL: @floatcmp_ueq_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp ueq <vscale x 2 x float> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -605,7 +605,7 @@ define <2 x i1> @floatcmp_ueq_fixed_one() { define <vscale x 2 x i1> @floatcmp_ueq_scalable_one() { ; CHECK-LABEL: @floatcmp_ueq_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp ueq <vscale x 2 x float> splat (float 1.0), splat (float 1.0) ret <vscale x 2 x i1> %res @@ -653,7 +653,7 @@ define <2 x i1> @floatcmp_uge_fixed_zero() { define <vscale x 2 x i1> @floatcmp_uge_scalable_zero() { ; CHECK-LABEL: @floatcmp_uge_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp uge <vscale x 2 x float> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -669,7 +669,7 @@ define <2 x i1> @floatcmp_uge_fixed_one() { define <vscale x 2 x i1> @floatcmp_uge_scalable_one() { ; CHECK-LABEL: @floatcmp_uge_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp uge <vscale x 2 x float> splat (float 1.0), splat (float 1.0) ret <vscale x 2 x i1> %res @@ -717,7 +717,7 @@ define <2 x i1> @floatcmp_ule_fixed_zero() { define <vscale x 2 x i1> @floatcmp_ule_scalable_zero() { ; CHECK-LABEL: @floatcmp_ule_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp ule <vscale x 2 x float> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -733,7 +733,7 @@ define <2 x i1> @floatcmp_ule_fixed_one() { define <vscale x 2 x i1> @floatcmp_ule_scalable_one() { ; CHECK-LABEL: @floatcmp_ule_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp ule <vscale x 2 x float> splat (float 1.0), splat (float 1.0) ret <vscale x 2 x i1> %res @@ -813,7 +813,7 @@ define <2 x i1> @floatcmp_true_fixed_zero() { define <vscale x 2 x i1> @floatcmp_true_scalable_zero() { ; CHECK-LABEL: @floatcmp_true_scalable_zero( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp true <vscale x 2 x float> zeroinitializer, zeroinitializer ret <vscale x 2 x i1> %res @@ -829,7 +829,7 @@ define <2 x i1> @floatcmp_true_fixed_one() { define <vscale x 2 x i1> @floatcmp_true_scalable_one() { ; CHECK-LABEL: @floatcmp_true_scalable_one( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %res = fcmp true <vscale x 2 x float> splat (float 1.0), splat (float 1.0) ret <vscale x 2 x i1> %res diff --git a/llvm/test/Transforms/InstSimplify/fp-nan.ll b/llvm/test/Transforms/InstSimplify/fp-nan.ll index 22d01ac..fe3a8c68 100644 --- a/llvm/test/Transforms/InstSimplify/fp-nan.ll +++ b/llvm/test/Transforms/InstSimplify/fp-nan.ll @@ -51,7 +51,7 @@ define <2 x float> @fsub_nan_op1_vec(<2 x float> %x) { define <vscale x 1 x float> @fsub_nan_op1_scalable_vec_0(<vscale x 1 x float> %x) { ; CHECK-LABEL: @fsub_nan_op1_scalable_vec_0( -; CHECK-NEXT: ret <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 0x7FF9000000000000, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 1 x float> splat (float 0x7FF9000000000000) ; %r = fsub <vscale x 1 x float> %x, splat (float 0x7FF1000000000000) ret <vscale x 1 x float> %r @@ -59,7 +59,7 @@ define <vscale x 1 x float> @fsub_nan_op1_scalable_vec_0(<vscale x 1 x float> %x define <vscale x 1 x float> @fsub_nan_op1_scalable_vec_1(<vscale x 1 x float> %x) { ; CHECK-LABEL: @fsub_nan_op1_scalable_vec_1( -; CHECK-NEXT: ret <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 0xFFF9000000000000, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 1 x float> splat (float 0xFFF9000000000000) ; %r = fsub <vscale x 1 x float> %x, splat (float 0xFFF1000000000000) ret <vscale x 1 x float> %r @@ -85,7 +85,7 @@ define <2 x double> @fmul_nan_op0_vec(<2 x double> %x) { define <vscale x 1 x double> @fmul_nan_op0_scalable_vec_0(<vscale x 1 x double> %x) { ; CHECK-LABEL: @fmul_nan_op0_scalable_vec_0( -; CHECK-NEXT: ret <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0xFFF8000000000001, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 1 x double> splat (double 0xFFF8000000000001) ; %r = fmul <vscale x 1 x double> splat (double 0xFFF0000000000001), %x ret <vscale x 1 x double> %r @@ -93,7 +93,7 @@ define <vscale x 1 x double> @fmul_nan_op0_scalable_vec_0(<vscale x 1 x double> define <vscale x 1 x double> @fmul_nan_op0_scalable_vec_1(<vscale x 1 x double> %x) { ; CHECK-LABEL: @fmul_nan_op0_scalable_vec_1( -; CHECK-NEXT: ret <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0xFFF8DEADDEADDEAD, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 1 x double> splat (double 0xFFF8DEADDEADDEAD) ; %r = fmul <vscale x 1 x double> splat (double 0xFFF0DEADDEADDEAD), %x ret <vscale x 1 x double> %r @@ -111,7 +111,7 @@ define <2 x float> @fmul_nan_op1(<2 x float> %x) { define <vscale x 1 x double> @fmul_nan_op1_scalable_vec(<vscale x 1 x double> %x) { ; CHECK-LABEL: @fmul_nan_op1_scalable_vec( -; CHECK-NEXT: ret <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0x7FF8000000000000, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 1 x double> splat (double 0x7FF8000000000000) ; %r = fmul <vscale x 1 x double> %x, splat (double 0x7FF8000000000000) ret <vscale x 1 x double> %r @@ -129,7 +129,7 @@ define <2 x double> @fdiv_nan_op0(<2 x double> %x) { define <vscale x 1 x double> @fdivl_nan_op0_scalable_vec(<vscale x 1 x double> %x) { ; CHECK-LABEL: @fdivl_nan_op0_scalable_vec( -; CHECK-NEXT: ret <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0xFFF800000000000F, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 1 x double> splat (double 0xFFF800000000000F) ; %r = fdiv <vscale x 1 x double> splat (double 0xFFF800000000000F), %x ret <vscale x 1 x double> %r @@ -147,7 +147,7 @@ define <2 x half> @fdiv_nan_op1(<2 x half> %x) { define <vscale x 1 x half> @fdiv_nan_op1_scalable_vec(<vscale x 1 x half> %x) { ; CHECK-LABEL: @fdiv_nan_op1_scalable_vec( -; CHECK-NEXT: ret <vscale x 1 x half> shufflevector (<vscale x 1 x half> insertelement (<vscale x 1 x half> poison, half 0xH7FFF, i64 0), <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 1 x half> splat (half 0xH7FFF) ; %r = fdiv <vscale x 1 x half> %x, splat (half 0xH7FFF) ret <vscale x 1 x half> %r @@ -220,7 +220,7 @@ define <2 x double> @fneg_nan_2(<2 x double> %x) { define <vscale x 1 x double> @fneg_nan_2_scalable_vec() { ; CHECK-LABEL: @fneg_nan_2_scalable_vec( -; CHECK-NEXT: ret <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0xFFF9234567890ABC, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 1 x double> splat (double 0xFFF9234567890ABC) ; %r = fsub <vscale x 1 x double> splat (double -0.0), splat (double 0xFFF1234567890ABC) ret <vscale x 1 x double> %r @@ -237,7 +237,7 @@ define <2 x double> @unary_fneg_nan_2(<2 x double> %x) { ; FIXME: This doesn't behave the same way as the fixed-length vectors above define <vscale x 1 x double> @unary_fneg_nan_2_scalable_vec_0() { ; CHECK-LABEL: @unary_fneg_nan_2_scalable_vec_0( -; CHECK-NEXT: ret <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0x7FF1234567890ABC, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 1 x double> splat (double 0x7FF1234567890ABC) ; %r = fneg <vscale x 1 x double> splat (double 0xFFF1234567890ABC) ret <vscale x 1 x double> %r @@ -246,7 +246,7 @@ define <vscale x 1 x double> @unary_fneg_nan_2_scalable_vec_0() { ; FIXME: This doesn't behave the same way as the fixed-length vectors above define <vscale x 1 x double> @unary_fneg_nan_2_scalable_vec_1() { ; CHECK-LABEL: @unary_fneg_nan_2_scalable_vec_1( -; CHECK-NEXT: ret <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0xFFF0000000000001, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 1 x double> splat (double 0xFFF0000000000001) ; %r = fneg <vscale x 1 x double> splat (double 0x7FF0000000000001) ret <vscale x 1 x double> %r diff --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll index 2767078..b23494f 100644 --- a/llvm/test/Transforms/InstSimplify/gep.ll +++ b/llvm/test/Transforms/InstSimplify/gep.ll @@ -251,7 +251,7 @@ define <4 x ptr> @vector_idx_mix_scalar_vector() { define <vscale x 4 x ptr> @scalable_idx_scalar() { ; CHECK-LABEL: @scalable_idx_scalar( -; CHECK-NEXT: ret <vscale x 4 x ptr> getelementptr (i32, <vscale x 4 x ptr> zeroinitializer, <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: ret <vscale x 4 x ptr> getelementptr (i32, <vscale x 4 x ptr> zeroinitializer, <vscale x 4 x i64> splat (i64 1)) ; %gep = getelementptr i32, <vscale x 4 x ptr> zeroinitializer, i64 1 ret <vscale x 4 x ptr> %gep diff --git a/llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll index bd9650b..70ca39d 100644 --- a/llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll @@ -61,7 +61,7 @@ define <vscale x 4 x i32> @insertelement_inline_to_ret() { define <vscale x 4 x i32> @insertelement_shufflevector_inline_to_ret() { ; CHECK-LABEL: @insertelement_shufflevector_inline_to_ret( -; CHECK-NEXT: ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i32> splat (i32 1) ; %i = insertelement <vscale x 4 x i32> poison, i32 1, i32 0 %i2 = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer @@ -132,7 +132,7 @@ define i32 @insert_extract_element_same_vec_idx_4() { define <vscale x 2 x i1> @cmp_le_smax_always_true(<vscale x 2 x i64> %x) { ; CHECK-LABEL: @cmp_le_smax_always_true( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %cmp = icmp sle <vscale x 2 x i64> %x, splat (i64 9223372036854775807) ret <vscale x 2 x i1> %cmp @@ -140,7 +140,7 @@ define <vscale x 2 x i1> @cmp_le_smax_always_true(<vscale x 2 x i64> %x) { define <vscale x 4 x float> @bitcast() { ; CHECK-LABEL: @bitcast( -; CHECK-NEXT: ret <vscale x 4 x float> bitcast (<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x float>) +; CHECK-NEXT: ret <vscale x 4 x float> bitcast (<vscale x 4 x i32> splat (i32 1) to <vscale x 4 x float>) ; %i1 = insertelement <vscale x 4 x i32> poison, i32 1, i32 0 %i2 = shufflevector <vscale x 4 x i32> %i1, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/InstSimplify/vscale.ll b/llvm/test/Transforms/InstSimplify/vscale.ll index 768c5f4..47cd88f 100644 --- a/llvm/test/Transforms/InstSimplify/vscale.ll +++ b/llvm/test/Transforms/InstSimplify/vscale.ll @@ -61,7 +61,7 @@ define <vscale x 4 x i32> @insertelement_inline_to_ret() { define <vscale x 4 x i32> @insertelement_shufflevector_inline_to_ret() { ; CHECK-LABEL: @insertelement_shufflevector_inline_to_ret( -; CHECK-NEXT: ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 4 x i32> splat (i32 1) ; %i = insertelement <vscale x 4 x i32> undef, i32 1, i32 0 %i2 = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer @@ -144,7 +144,7 @@ entry: define <vscale x 2 x i1> @cmp_le_smax_always_true(<vscale x 2 x i64> %x) { ; CHECK-LABEL: @cmp_le_smax_always_true( -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %cmp = icmp sle <vscale x 2 x i64> %x, splat (i64 9223372036854775807) ret <vscale x 2 x i1> %cmp @@ -152,7 +152,7 @@ define <vscale x 2 x i1> @cmp_le_smax_always_true(<vscale x 2 x i64> %x) { define <vscale x 4 x float> @bitcast() { ; CHECK-LABEL: @bitcast( -; CHECK-NEXT: ret <vscale x 4 x float> bitcast (<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 1, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x float>) +; CHECK-NEXT: ret <vscale x 4 x float> bitcast (<vscale x 4 x i32> splat (i32 1) to <vscale x 4 x float>) ; %i1 = insertelement <vscale x 4 x i32> undef, i32 1, i32 0 %i2 = shufflevector <vscale x 4 x i32> %i1, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll index e5b56eb..436389b 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll @@ -7,7 +7,7 @@ target triple = "aarch64-linux-gnu" define void @deinterleave_nxi8_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nxi8_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[LDN]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[LDN]], 1 ; CHECK-NEXT: ret void @@ -22,7 +22,7 @@ define void @deinterleave_nxi8_factor2(ptr %ptr) #0 { define void @deinterleave_nxi16_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nxi16_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 1 ; CHECK-NEXT: ret void @@ -37,7 +37,7 @@ define void @deinterleave_nxi16_factor2(ptr %ptr) #0 { define void @deinterleave_nx8xi32_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nx8xi32_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1 ; CHECK-NEXT: ret void @@ -52,7 +52,7 @@ define void @deinterleave_nx8xi32_factor2(ptr %ptr) #0 { define void @deinterleave_nxi64_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nxi64_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1 ; CHECK-NEXT: ret void @@ -67,7 +67,7 @@ define void @deinterleave_nxi64_factor2(ptr %ptr) #0 { define void @deinterleave_nxfloat_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nxfloat_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 1 ; CHECK-NEXT: ret void @@ -82,7 +82,7 @@ define void @deinterleave_nxfloat_factor2(ptr %ptr) #0 { define void @deinterleave_nxdouble_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nxdouble_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 1 ; CHECK-NEXT: ret void @@ -97,7 +97,7 @@ define void @deinterleave_nxdouble_factor2(ptr %ptr) #0 { define void @deinterleave_nxptr_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nxptr_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x ptr>, <vscale x 2 x ptr> } @llvm.aarch64.sve.ld2.sret.nxv2p0(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x ptr>, <vscale x 2 x ptr> } @llvm.aarch64.sve.ld2.sret.nxv2p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 2 x ptr>, <vscale x 2 x ptr> } [[LDN]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x ptr>, <vscale x 2 x ptr> } [[LDN]], 1 ; CHECK-NEXT: ret void @@ -112,7 +112,7 @@ define void @deinterleave_nxptr_factor2(ptr %ptr) #0 { define void @interleave_nxi8_factor2(ptr %ptr, <vscale x 16 x i8> %l, <vscale x 16 x i8> %r) #0 { ; CHECK-LABEL: define void @interleave_nxi8_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 16 x i8> [[L:%.*]], <vscale x 16 x i8> [[R:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[L]], <vscale x 16 x i8> [[R]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[L]], <vscale x 16 x i8> [[R]], <vscale x 16 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleave = tail call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> %l, <vscale x 16 x i8> %r) @@ -123,7 +123,7 @@ define void @interleave_nxi8_factor2(ptr %ptr, <vscale x 16 x i8> %l, <vscale x define void @interleave_nxi16_factor2(ptr %ptr, <vscale x 8 x i16> %l, <vscale x 8 x i16> %r) #0 { ; CHECK-LABEL: define void @interleave_nxi16_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 8 x i16> [[L:%.*]], <vscale x 8 x i16> [[R:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[L]], <vscale x 8 x i16> [[R]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[L]], <vscale x 8 x i16> [[R]], <vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleave = tail call <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16> %l, <vscale x 8 x i16> %r) @@ -134,7 +134,7 @@ define void @interleave_nxi16_factor2(ptr %ptr, <vscale x 8 x i16> %l, <vscale x define void @interleave_nxi32_factor2(ptr %ptr, <vscale x 4 x i32> %l, <vscale x 4 x i32> %r) #0 { ; CHECK-LABEL: define void @interleave_nxi32_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 4 x i32> [[L:%.*]], <vscale x 4 x i32> [[R:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[L]], <vscale x 4 x i32> [[R]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[L]], <vscale x 4 x i32> [[R]], <vscale x 4 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleave = tail call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %l, <vscale x 4 x i32> %r) @@ -145,7 +145,7 @@ define void @interleave_nxi32_factor2(ptr %ptr, <vscale x 4 x i32> %l, <vscale x define void @interleave_nxi64_factor2(ptr %ptr, <vscale x 2 x i64> %l, <vscale x 2 x i64> %r) #0 { ; CHECK-LABEL: define void @interleave_nxi64_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 2 x i64> [[L:%.*]], <vscale x 2 x i64> [[R:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[L]], <vscale x 2 x i64> [[R]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[L]], <vscale x 2 x i64> [[R]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleave = tail call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> %l, <vscale x 2 x i64> %r) @@ -156,7 +156,7 @@ define void @interleave_nxi64_factor2(ptr %ptr, <vscale x 2 x i64> %l, <vscale x define void @interleave_nxfloat_factor2(ptr %ptr, <vscale x 4 x float> %l, <vscale x 4 x float> %r) #0 { ; CHECK-LABEL: define void @interleave_nxfloat_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 4 x float> [[L:%.*]], <vscale x 4 x float> [[R:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[L]], <vscale x 4 x float> [[R]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[L]], <vscale x 4 x float> [[R]], <vscale x 4 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleave = tail call <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float> %l, <vscale x 4 x float> %r) @@ -167,7 +167,7 @@ define void @interleave_nxfloat_factor2(ptr %ptr, <vscale x 4 x float> %l, <vsca define void @interleave_nxdouble_factor2(ptr %ptr, <vscale x 2 x double> %l, <vscale x 2 x double> %r) #0 { ; CHECK-LABEL: define void @interleave_nxdouble_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 2 x double> [[L:%.*]], <vscale x 2 x double> [[R:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[L]], <vscale x 2 x double> [[R]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[L]], <vscale x 2 x double> [[R]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleave = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %l, <vscale x 2 x double> %r) @@ -178,7 +178,7 @@ define void @interleave_nxdouble_factor2(ptr %ptr, <vscale x 2 x double> %l, <vs define void @interleave_nxptr_factor2(ptr %ptr, <vscale x 2 x ptr> %l, <vscale x 2 x ptr> %r) #0 { ; CHECK-LABEL: define void @interleave_nxptr_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 2 x ptr> [[L:%.*]], <vscale x 2 x ptr> [[R:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2p0(<vscale x 2 x ptr> [[L]], <vscale x 2 x ptr> [[R]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2p0(<vscale x 2 x ptr> [[L]], <vscale x 2 x ptr> [[R]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleave = tail call <vscale x 4 x ptr> @llvm.vector.interleave2.nxv4p0(<vscale x 2 x ptr> %l, <vscale x 2 x ptr> %r) @@ -192,25 +192,25 @@ define void @deinterleave_wide_nxi32_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_wide_nxi32_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[PTR]], i64 0 -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP1]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[PTR]], i64 2 -; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP6]]) +; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP7]], i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP9]], i64 4) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[PTR]], i64 4 -; CHECK-NEXT: [[LDN2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP11]]) +; CHECK-NEXT: [[LDN2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN2]], 0 ; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP8]], <vscale x 4 x i32> [[TMP12]], i64 8) ; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN2]], 1 ; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP10]], <vscale x 4 x i32> [[TMP14]], i64 8) ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[PTR]], i64 6 -; CHECK-NEXT: [[LDN3:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP16]]) +; CHECK-NEXT: [[LDN3:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[TMP16]]) ; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN3]], 0 ; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP13]], <vscale x 4 x i32> [[TMP17]], i64 12) ; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN3]], 1 @@ -228,13 +228,13 @@ define void @deinterleave_wide_nxdouble_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_wide_nxdouble_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 0 -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP1]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> splat (i1 true), ptr [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 2 -; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP6]]) +; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> splat (i1 true), ptr [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN1]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP7]], i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN1]], 1 @@ -254,11 +254,11 @@ define void @interleave_wide_nxdouble_factor2(ptr %ptr, <vscale x 4 x double> %l ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[L]], i64 0) ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[R]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP1]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x i1> splat (i1 true), ptr [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 2 ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[L]], i64 2) ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[R]], i64 2) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x i1> splat (i1 true), ptr [[TMP4]]) ; CHECK-NEXT: ret void ; %interleave = tail call <vscale x 8 x double> @llvm.vector.interleave2.nxv8f64(<vscale x 4 x double> %l, <vscale x 4 x double> %r) diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll index 06ecff6..61a6869 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll @@ -5,7 +5,7 @@ define void @deinterleave4(ptr %src) { ; CHECK-LABEL: define void @deinterleave4 ; CHECK-SAME: (ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[SRC]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[SRC]]) ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2 @@ -34,7 +34,7 @@ define void @wide_deinterleave4(ptr %src) { ; CHECK-LABEL: define void @wide_deinterleave4 ; CHECK-SAME: (ptr [[SRC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[SRC]], i64 0 -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP1]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1 @@ -44,7 +44,7 @@ define void @wide_deinterleave4(ptr %src) { ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP8]], i64 0) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[SRC]], i64 4 -; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP10]]) +; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[TMP10]]) ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP11]], i64 4) ; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 1 @@ -75,12 +75,12 @@ define void @wide_deinterleave4(ptr %src) { define void @mix_deinterleave4_deinterleave2(ptr %src) { ; CHECK-LABEL: define void @mix_deinterleave4_deinterleave2 ; CHECK-SAME: (ptr [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[SRC]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[SRC]]) ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 3 -; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[SRC]]) +; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[SRC]]) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 1 ; CHECK-NEXT: ret void @@ -108,13 +108,13 @@ define void @negative_deinterleave4_test(ptr %src) { ; CHECK-LABEL: define void @negative_deinterleave4_test ; CHECK-SAME: (ptr [[SRC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[SRC]], i64 0 -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP1]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[SRC]], i64 2 -; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP6]]) +; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> splat (i1 true), ptr [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP7]], i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN1]], 1 diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleave4.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleave4.ll index ba9bff0..e8d113a 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleave4.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleave4.ll @@ -5,7 +5,7 @@ define void @interleave4(ptr %dst, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) { ; CHECK-LABEL: define void @interleave4 ; CHECK-SAME: (ptr [[DST:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]], <vscale x 4 x i32> [[D:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]], <vscale x 4 x i32> [[D]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[DST]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]], <vscale x 4 x i32> [[D]], <vscale x 4 x i1> splat (i1 true), ptr [[DST]]) ; CHECK-NEXT: ret void ; %interleaved.half1 = tail call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c) @@ -23,13 +23,13 @@ define void @wide_interleave4(ptr %dst, <vscale x 8 x i32> %a, <vscale x 8 x i32 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[B]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[C]], i64 0) ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[D]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP1]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i1> splat (i1 true), ptr [[TMP1]]) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[DST]], i64 4 ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[A]], i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[B]], i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[C]], i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[D]], i64 4) -; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], <vscale x 4 x i32> [[TMP9]], <vscale x 4 x i32> [[TMP10]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], <vscale x 4 x i32> [[TMP9]], <vscale x 4 x i32> [[TMP10]], <vscale x 4 x i1> splat (i1 true), ptr [[TMP6]]) ; CHECK-NEXT: ret void ; %interleaved.half1 = tail call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %c) @@ -42,8 +42,8 @@ define void @wide_interleave4(ptr %dst, <vscale x 8 x i32> %a, <vscale x 8 x i32 define void @mix_interleave4_interleave2(ptr %dst1, ptr %dst2, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) { ; CHECK-LABEL: define void @mix_interleave4_interleave2 ; CHECK-SAME: (ptr [[DST1:%.*]], ptr [[DST2:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]], <vscale x 4 x i32> [[D:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]], <vscale x 4 x i32> [[D]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[DST1]]) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[C]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), ptr [[DST2]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]], <vscale x 4 x i32> [[D]], <vscale x 4 x i1> splat (i1 true), ptr [[DST1]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[C]], <vscale x 4 x i1> splat (i1 true), ptr [[DST2]]) ; CHECK-NEXT: ret void ; %interleaved.half1 = tail call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c) diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll index 8821255..0ba8122 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll @@ -595,13 +595,13 @@ define void @deinterleave_nxptr_factor2(ptr %ptr) #2 { ; CHECK-LABEL: define void @deinterleave_nxptr_factor2( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 0 -; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP1]]) +; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> splat (i1 true), ptr [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN1]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN1]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 2 -; CHECK-NEXT: [[LDN2:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP6]]) +; CHECK-NEXT: [[LDN2:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> splat (i1 true), ptr [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN2]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP7]], i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN2]], 1 diff --git a/llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll b/llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll index 8cf7610..afc28cf 100644 --- a/llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll +++ b/llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll @@ -39,11 +39,11 @@ define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %n) { ; CHECK-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] -; CHECK-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] -; CHECK-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; CHECK: mismatch_vec_loop_inc: @@ -129,11 +129,11 @@ define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %n) { ; LMUL8-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] ; LMUL8-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true) ; LMUL8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] -; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) ; LMUL8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] -; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) -; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) -; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) +; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) +; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) ; LMUL8-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] ; LMUL8-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; LMUL8: mismatch_vec_loop_inc: @@ -215,11 +215,11 @@ define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %n) { ; LOOP-DEL-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] ; LOOP-DEL-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; LOOP-DEL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] -; LOOP-DEL-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; LOOP-DEL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] -; LOOP-DEL-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; LOOP-DEL-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; LOOP-DEL-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; LOOP-DEL-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] ; LOOP-DEL-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; LOOP-DEL: mismatch_vec_loop_inc: @@ -406,11 +406,11 @@ define i32 @compare_bytes_signed_wrap(ptr %a, ptr %b, i32 %len, i32 %n) { ; CHECK-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] -; CHECK-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] -; CHECK-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; CHECK: mismatch_vec_loop_inc: @@ -496,11 +496,11 @@ define i32 @compare_bytes_signed_wrap(ptr %a, ptr %b, i32 %len, i32 %n) { ; LMUL8-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] ; LMUL8-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true) ; LMUL8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] -; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) ; LMUL8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] -; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) -; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) -; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) +; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) +; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) ; LMUL8-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] ; LMUL8-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; LMUL8: mismatch_vec_loop_inc: @@ -582,11 +582,11 @@ define i32 @compare_bytes_signed_wrap(ptr %a, ptr %b, i32 %len, i32 %n) { ; LOOP-DEL-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] ; LOOP-DEL-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; LOOP-DEL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] -; LOOP-DEL-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; LOOP-DEL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] -; LOOP-DEL-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; LOOP-DEL-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; LOOP-DEL-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; LOOP-DEL-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] ; LOOP-DEL-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; LOOP-DEL: mismatch_vec_loop_inc: @@ -794,11 +794,11 @@ define i32 @compare_bytes_simple_end_ne_found(ptr %a, ptr %b, ptr %c, ptr %d, i3 ; CHECK-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] -; CHECK-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] -; CHECK-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; CHECK: mismatch_vec_loop_inc: @@ -895,11 +895,11 @@ define i32 @compare_bytes_simple_end_ne_found(ptr %a, ptr %b, ptr %c, ptr %d, i3 ; LMUL8-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] ; LMUL8-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true) ; LMUL8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] -; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) ; LMUL8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] -; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) -; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) -; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) +; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) +; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) ; LMUL8-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] ; LMUL8-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; LMUL8: mismatch_vec_loop_inc: @@ -992,11 +992,11 @@ define i32 @compare_bytes_simple_end_ne_found(ptr %a, ptr %b, ptr %c, ptr %d, i3 ; LOOP-DEL-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] ; LOOP-DEL-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; LOOP-DEL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] -; LOOP-DEL-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; LOOP-DEL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] -; LOOP-DEL-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; LOOP-DEL-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; LOOP-DEL-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; LOOP-DEL-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] ; LOOP-DEL-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; LOOP-DEL: mismatch_vec_loop_inc: @@ -1245,11 +1245,11 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) { ; CHECK-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] -; CHECK-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] -; CHECK-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; CHECK-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; CHECK: mismatch_vec_loop_inc: @@ -1341,11 +1341,11 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) { ; LMUL8-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] ; LMUL8-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true) ; LMUL8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] -; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP20]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) ; LMUL8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] -; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) -; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) -; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP19]]) +; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP21]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) +; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) +; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> splat (i1 true), i32 [[TMP19]]) ; LMUL8-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] ; LMUL8-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; LMUL8: mismatch_vec_loop_inc: @@ -1433,11 +1433,11 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) { ; LOOP-DEL-NEXT: [[AVL:%.*]] = sub nuw nsw i64 [[TMP2]], [[MISMATCH_VECTOR_INDEX]] ; LOOP-DEL-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; LOOP-DEL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[MISMATCH_VECTOR_INDEX]] -; LOOP-DEL-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; LOOP-DEL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]] -; LOOP-DEL-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; LOOP-DEL-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) -; LOOP-DEL-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) +; LOOP-DEL-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP19]]) ; LOOP-DEL-NEXT: [[TMP22:%.*]] = icmp ne i32 [[FIRST]], [[TMP19]] ; LOOP-DEL-NEXT: br i1 [[TMP22]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; LOOP-DEL: mismatch_vec_loop_inc: @@ -1653,11 +1653,11 @@ define void @compare_bytes_cleanup_block(ptr %src1, ptr %src2) { ; CHECK-NEXT: [[AVL:%.*]] = sub nuw nsw i64 0, [[MISMATCH_VECTOR_INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[MISMATCH_VECTOR_INDEX]] -; CHECK-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP16]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP15]]) +; CHECK-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP15]]) ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[MISMATCH_VECTOR_INDEX]] -; CHECK-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP15]]) -; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP15]]) -; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP15]]) +; CHECK-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP15]]) +; CHECK-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> splat (i1 true), i32 [[TMP15]]) +; CHECK-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> splat (i1 true), i32 [[TMP15]]) ; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i32 [[FIRST]], [[TMP15]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; CHECK: mismatch_vec_loop_inc: @@ -1742,11 +1742,11 @@ define void @compare_bytes_cleanup_block(ptr %src1, ptr %src2) { ; LMUL8-NEXT: [[AVL:%.*]] = sub nuw nsw i64 0, [[MISMATCH_VECTOR_INDEX]] ; LMUL8-NEXT: [[TMP15:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 64, i1 true) ; LMUL8-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SRC1]], i64 [[MISMATCH_VECTOR_INDEX]] -; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP16]], <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP15]]) +; LMUL8-NEXT: [[LHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP16]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP15]]) ; LMUL8-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[SRC2]], i64 [[MISMATCH_VECTOR_INDEX]] -; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP17]], <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP15]]) -; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP15]]) -; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> shufflevector (<vscale x 64 x i1> insertelement (<vscale x 64 x i1> poison, i1 true, i64 0), <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer), i32 [[TMP15]]) +; LMUL8-NEXT: [[RHS_LOAD:%.*]] = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr [[TMP17]], <vscale x 64 x i1> splat (i1 true), i32 [[TMP15]]) +; LMUL8-NEXT: [[MISMATCH_CMP:%.*]] = call <vscale x 64 x i1> @llvm.vp.icmp.nxv64i8(<vscale x 64 x i8> [[LHS_LOAD]], <vscale x 64 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 64 x i1> splat (i1 true), i32 [[TMP15]]) +; LMUL8-NEXT: [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv64i1(<vscale x 64 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 64 x i1> splat (i1 true), i32 [[TMP15]]) ; LMUL8-NEXT: [[TMP18:%.*]] = icmp ne i32 [[FIRST]], [[TMP15]] ; LMUL8-NEXT: br i1 [[TMP18]], label [[MISMATCH_VECTOR_LOOP_FOUND:%.*]], label [[MISMATCH_VECTOR_LOOP_INC]] ; LMUL8: mismatch_vec_loop_inc: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 1f3d9d7..c4559dc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -19,7 +19,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8) ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP9]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0 @@ -33,14 +33,14 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP15:%.*]] = lshr <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = trunc <vscale x 4 x i64> [[TMP15]] to <vscale x 4 x i8> ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP16]], ptr [[TMP17]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 8) -; CHECK-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i1> [[TMP18]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -108,7 +108,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP9]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0 @@ -122,14 +122,14 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP15:%.*]] = lshr <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = trunc <vscale x 4 x i64> [[TMP15]] to <vscale x 4 x i8> ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP16]], ptr [[TMP17]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]]) -; CHECK-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i1> [[TMP18]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index c6e5832..1acc400 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -673,7 +673,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; PRED-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> zeroinitializer, ptr [[TMP23]], i32 4, <vscale x 4 x i1> [[TMP21]]), !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]] ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP14]]) -; PRED-NEXT: [[TMP24:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; PRED-NEXT: [[TMP24:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP25:%.*]] = extractelement <vscale x 4 x i1> [[TMP24]], i32 0 ; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; PRED: middle.block: @@ -806,13 +806,13 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[TMP12:%.*]] = load i16, ptr [[SRC]], align 2 ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[TMP12]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer -; PRED-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT]], shufflevector (<vscale x 2 x i16> insertelement (<vscale x 2 x i16> poison, i16 1, i64 0), <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer) +; PRED-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT]], splat (i16 1) ; PRED-NEXT: [[TMP14:%.*]] = uitofp <vscale x 2 x i16> [[TMP13]] to <vscale x 2 x double> ; PRED-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i32 0 ; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP14]], ptr [[TMP15]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]]) -; PRED-NEXT: [[TMP16:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; PRED-NEXT: [[TMP16:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x i1> [[TMP16]], i32 0 ; PRED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; PRED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 0b5d65f..5e7b4ca 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -126,7 +126,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i64> [[TMP15]], zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP16]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP16]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0 @@ -143,7 +143,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = icmp ule <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP23:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP22]], <vscale x 2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP24:%.*]] = select <vscale x 2 x i1> [[TMP23]], <vscale x 2 x i64> [[BROADCAST_SPLAT2]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP24:%.*]] = select <vscale x 2 x i1> [[TMP23]], <vscale x 2 x i64> [[BROADCAST_SPLAT2]], <vscale x 2 x i64> splat (i64 1) ; CHECK-NEXT: [[TMP25:%.*]] = sdiv <vscale x 2 x i64> [[BROADCAST_SPLAT]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <vscale x 2 x i64> [[TMP25]], i32 0 ; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[TMP26]] to i32 @@ -158,7 +158,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> zeroinitializer, ptr [[TMP35]], i32 8, <vscale x 2 x i1> [[TMP23]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) -; CHECK-NEXT: [[TMP36:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP36:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <vscale x 2 x i1> [[TMP36]], i32 0 ; CHECK-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -253,7 +253,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]]) ; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i64> [[TMP15]], zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP16]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP16]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0 @@ -286,7 +286,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP23]], ptr [[TMP39]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) -; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0 ; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll index 7dcab6d8..fdf6122 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -25,7 +25,7 @@ define void @f1(ptr %A) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 -; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP8]], align 4 +; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll index 2cc0aa2..a3a0c3e7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll @@ -70,7 +70,7 @@ define void @PR34711(ptr %a, ptr %b, ptr %c, i64 %n) #0 { ; CHECK-VF4UF1-LABEL: @PR34711 ; CHECK-VF4UF1: vector.body ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.*]], %vector.body ] -; CHECK-VF4UF1: %[[MGATHER]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> {{.*}}, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison) +; CHECK-VF4UF1: %[[MGATHER]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> {{.*}}, i32 2, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i16> poison) ; CHECK-VF4UF1-NEXT: %[[SPLICE:.*]] = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> %[[VEC_RECUR]], <vscale x 4 x i16> %[[MGATHER]], i32 -1) ; CHECK-VF4UF1-NEXT: %[[SXT1:.*]] = sext <vscale x 4 x i16> %[[SPLICE]] to <vscale x 4 x i32> ; CHECK-VF4UF1-NEXT: %[[SXT2:.*]] = sext <vscale x 4 x i16> %[[MGATHER]] to <vscale x 4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll index 763b3e0..f666f30 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll @@ -59,14 +59,14 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; SVE: vector.body: ; SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x double> [ insertelement (<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), double 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x double> [ insertelement (<vscale x 2 x double> splat (double -0.000000e+00), double 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; SVE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; SVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP4]] ; SVE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; SVE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP6]], align 4 ; SVE-NEXT: [[TMP7:%.*]] = sext <vscale x 2 x i32> [[WIDE_LOAD]] to <vscale x 2 x i64> ; SVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], <vscale x 2 x i64> [[TMP7]] -; SVE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP8]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> poison) +; SVE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP8]], i32 8, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> poison) ; SVE-NEXT: [[TMP9]] = fadd <vscale x 2 x double> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] ; SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SVE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 2f756ab..4faed42 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -57,8 +57,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP31:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD4]] to <vscale x 8 x i16> ; DEFAULT-NEXT: [[TMP32:%.*]] = or <vscale x 8 x i16> [[TMP28]], [[TMP30]] ; DEFAULT-NEXT: [[TMP33:%.*]] = or <vscale x 8 x i16> [[TMP29]], [[TMP31]] -; DEFAULT-NEXT: [[TMP34:%.*]] = lshr <vscale x 8 x i16> [[TMP32]], trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>) -; DEFAULT-NEXT: [[TMP35:%.*]] = lshr <vscale x 8 x i16> [[TMP33]], trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>) +; DEFAULT-NEXT: [[TMP34:%.*]] = lshr <vscale x 8 x i16> [[TMP32]], trunc (<vscale x 8 x i32> splat (i32 1) to <vscale x 8 x i16>) +; DEFAULT-NEXT: [[TMP35:%.*]] = lshr <vscale x 8 x i16> [[TMP33]], trunc (<vscale x 8 x i32> splat (i32 1) to <vscale x 8 x i16>) ; DEFAULT-NEXT: [[TMP36:%.*]] = trunc <vscale x 8 x i16> [[TMP34]] to <vscale x 8 x i8> ; DEFAULT-NEXT: [[TMP37:%.*]] = trunc <vscale x 8 x i16> [[TMP35]] to <vscale x 8 x i8> ; DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]] @@ -102,7 +102,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP56:%.*]] = mul <vscale x 4 x i16> [[TMP55]], [[TMP51]] ; DEFAULT-NEXT: [[TMP57:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD11]] to <vscale x 4 x i16> ; DEFAULT-NEXT: [[TMP58:%.*]] = or <vscale x 4 x i16> [[TMP56]], [[TMP57]] -; DEFAULT-NEXT: [[TMP59:%.*]] = lshr <vscale x 4 x i16> [[TMP58]], trunc (<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x i16>) +; DEFAULT-NEXT: [[TMP59:%.*]] = lshr <vscale x 4 x i16> [[TMP58]], trunc (<vscale x 4 x i32> splat (i32 1) to <vscale x 4 x i16>) ; DEFAULT-NEXT: [[TMP60:%.*]] = trunc <vscale x 4 x i16> [[TMP59]] to <vscale x 4 x i8> ; DEFAULT-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP52]] ; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[TMP61]], i32 0 @@ -177,14 +177,14 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP21:%.*]] = mul <vscale x 8 x i16> [[TMP20]], [[TMP16]] ; PRED-NEXT: [[TMP22:%.*]] = zext <vscale x 8 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 8 x i16> ; PRED-NEXT: [[TMP23:%.*]] = or <vscale x 8 x i16> [[TMP21]], [[TMP22]] -; PRED-NEXT: [[TMP24:%.*]] = lshr <vscale x 8 x i16> [[TMP23]], trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>) +; PRED-NEXT: [[TMP24:%.*]] = lshr <vscale x 8 x i16> [[TMP23]], trunc (<vscale x 8 x i32> splat (i32 1) to <vscale x 8 x i16>) ; PRED-NEXT: [[TMP25:%.*]] = trunc <vscale x 8 x i16> [[TMP24]] to <vscale x 8 x i8> ; PRED-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]] ; PRED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0 ; PRED-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP25]], ptr [[TMP27]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]]) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; PRED-NEXT: [[TMP28:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer) +; PRED-NEXT: [[TMP28:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP29:%.*]] = extractelement <vscale x 8 x i1> [[TMP28]], i32 0 ; PRED-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index 1ec384b..59b879e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -335,7 +335,7 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 [[TMP1]]) -; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 16 x i1> [[TMP16]], i32 0 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 5496eed..61bbae0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -24,7 +24,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 -; TFNONE-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; TFNONE-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -70,7 +70,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) ; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP8:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFCOMMON-NEXT: [[TMP8:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; TFCOMMON-NEXT: [[TMP9:%.*]] = extractelement <vscale x 2 x i1> [[TMP8]], i32 0 ; TFCOMMON-NEXT: br i1 [[TMP9]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFCOMMON: for.cond.cleanup: @@ -115,7 +115,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = add i64 [[INDEX_NEXT]], [[TMP18]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP19]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x i1> [[TMP20]], i32 0 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFA_INTERLEAVE: for.cond.cleanup: @@ -156,9 +156,9 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 -; TFNONE-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 50, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; TFNONE-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 50) ; TFNONE-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_LOAD]], <vscale x 2 x i1> [[TMP7]]) -; TFNONE-NEXT: [[TMP9:%.*]] = xor <vscale x 2 x i1> [[TMP7]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFNONE-NEXT: [[TMP9:%.*]] = xor <vscale x 2 x i1> [[TMP7]], splat (i1 true) ; TFNONE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP8]] ; TFNONE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP10]], align 8 @@ -206,7 +206,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) -; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 50, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], splat (i64 50) ; TFCOMMON-NEXT: [[TMP7:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> zeroinitializer ; TFCOMMON-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP7]]) ; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i64> [[TMP8]], <vscale x 2 x i64> zeroinitializer @@ -214,7 +214,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) ; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP10:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFCOMMON-NEXT: [[TMP10:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; TFCOMMON-NEXT: [[TMP11:%.*]] = extractelement <vscale x 2 x i1> [[TMP10]], i32 0 ; TFCOMMON-NEXT: br i1 [[TMP11]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFCOMMON: for.cond.cleanup: @@ -245,8 +245,8 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i64> poison) -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 50, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 50, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], splat (i64 50) +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], splat (i64 50) ; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i1> zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP13]]) @@ -265,7 +265,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = add i64 [[INDEX_NEXT]], [[TMP22]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP23]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <vscale x 2 x i1> [[TMP24]], i32 0 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFA_INTERLEAVE: for.cond.cleanup: @@ -317,8 +317,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 -; TFNONE-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 50, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; TFNONE-NEXT: [[TMP8:%.*]] = xor <vscale x 2 x i1> [[TMP7]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFNONE-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 50) +; TFNONE-NEXT: [[TMP8:%.*]] = xor <vscale x 2 x i1> [[TMP7]], splat (i1 true) ; TFNONE-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP8]]) ; TFNONE-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_LOAD]], <vscale x 2 x i1> [[TMP7]]) ; TFNONE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP8]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP10]] @@ -371,8 +371,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; TFCOMMON-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFCOMMON-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) -; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 50, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; TFCOMMON-NEXT: [[TMP7:%.*]] = xor <vscale x 2 x i1> [[TMP6]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], splat (i64 50) +; TFCOMMON-NEXT: [[TMP7:%.*]] = xor <vscale x 2 x i1> [[TMP6]], splat (i1 true) ; TFCOMMON-NEXT: [[TMP8:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i1> zeroinitializer ; TFCOMMON-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP8]]) ; TFCOMMON-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> zeroinitializer @@ -382,7 +382,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP12]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) ; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP13:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFCOMMON-NEXT: [[TMP13:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement <vscale x 2 x i1> [[TMP13]], i32 0 ; TFCOMMON-NEXT: br i1 [[TMP14]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFCOMMON: for.cond.cleanup: @@ -413,10 +413,10 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) ; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i64> poison) -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 50, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 50, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = xor <vscale x 2 x i1> [[TMP11]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) -; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = xor <vscale x 2 x i1> [[TMP12]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], splat (i64 50) +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], splat (i64 50) +; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = xor <vscale x 2 x i1> [[TMP11]], splat (i1 true) +; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = xor <vscale x 2 x i1> [[TMP12]], splat (i1 true) ; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i1> zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP15]]) @@ -439,7 +439,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = add i64 [[INDEX_NEXT]], [[TMP28]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP29]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; TFA_INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement <vscale x 2 x i1> [[TMP30]], i32 0 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP31]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFA_INTERLEAVE: for.cond.cleanup: @@ -665,7 +665,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) ; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFALWAYS-NEXT: [[TMP8:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFALWAYS-NEXT: [[TMP8:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; TFALWAYS-NEXT: [[TMP9:%.*]] = extractelement <vscale x 2 x i1> [[TMP8]], i32 0 ; TFALWAYS-NEXT: br i1 [[TMP9]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFALWAYS: for.cond.cleanup: @@ -693,7 +693,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFFALLBACK-NEXT: [[TMP8:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFFALLBACK-NEXT: [[TMP8:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; TFFALLBACK-NEXT: [[TMP9:%.*]] = extractelement <vscale x 2 x i1> [[TMP8]], i32 0 ; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TFFALLBACK: for.cond.cleanup: @@ -738,7 +738,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = add i64 [[INDEX_NEXT]], [[TMP18]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP19]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x i1> [[TMP20]], i32 0 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFA_INTERLEAVE: for.cond.cleanup: @@ -786,7 +786,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x double>, ptr [[TMP6]], align 8 ; TFNONE-NEXT: [[TMP7:%.*]] = fmul <vscale x 2 x double> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; TFNONE-NEXT: [[TMP8:%.*]] = fptoui <vscale x 2 x double> [[WIDE_LOAD]] to <vscale x 2 x i64> -; TFNONE-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[TMP8]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; TFNONE-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[TMP8]], <vscale x 2 x i1> splat (i1 true)) ; TFNONE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP9]], ptr [[TMP10]], align 8 ; TFNONE-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], <vscale x 2 x double> [[TMP7]]) @@ -841,11 +841,11 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFALWAYS-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) ; TFALWAYS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP8]], ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer) +; TFALWAYS-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x double> splat (double -0.000000e+00) ; TFALWAYS-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], <vscale x 2 x double> [[TMP10]]) ; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFALWAYS-NEXT: [[TMP12:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFALWAYS-NEXT: [[TMP12:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; TFALWAYS-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x i1> [[TMP12]], i32 0 ; TFALWAYS-NEXT: br i1 [[TMP13]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFALWAYS: for.cond.cleanup: @@ -876,11 +876,11 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFFALLBACK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) ; TFFALLBACK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP8]], ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer) +; TFFALLBACK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> [[TMP6]], <vscale x 2 x double> splat (double -0.000000e+00) ; TFFALLBACK-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], <vscale x 2 x double> [[TMP10]]) ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFFALLBACK-NEXT: [[TMP12:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFFALLBACK-NEXT: [[TMP12:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; TFFALLBACK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x i1> [[TMP12]], i32 0 ; TFFALLBACK-NEXT: br i1 [[TMP13]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TFFALLBACK: for.cond.cleanup: @@ -926,9 +926,9 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP15]], ptr [[TMP17]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) ; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP16]], ptr [[TMP20]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]]) -; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer) +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x double> splat (double -0.000000e+00) ; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], <vscale x 2 x double> [[TMP21]]) -; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x double> [[TMP12]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer) +; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x double> [[TMP12]], <vscale x 2 x double> splat (double -0.000000e+00) ; TFA_INTERLEAVE-NEXT: [[TMP24]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[TMP22]], <vscale x 2 x double> [[TMP23]]) ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() @@ -936,7 +936,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX_NEXT]], [[TMP26]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP27]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <vscale x 2 x i1> [[TMP28]], i32 0 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFA_INTERLEAVE: for.cond.cleanup: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll index 5e50e5d..0db4799 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll @@ -18,39 +18,39 @@ define void @foo() { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP5:%.*]] = add <vscale x 4 x i64> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[TMP5]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP19]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i64> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[OUTER_LOOP_LATCH4:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[OUTER_LOOP_LATCH4]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_LATCH:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_LATCH]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, <vscale x 4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison) ; CHECK-NEXT: br label [[INNER_LOOP1:%.*]] ; CHECK: inner_loop1: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP13:%.*]], [[INNER_LOOP1]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x float> [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP12:%.*]], [[INNER_LOOP1]] ] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, <vscale x 4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison) ; CHECK-NEXT: [[TMP12]] = fmul <vscale x 4 x float> [[VEC_PHI2]], [[WIDE_MASKED_GATHER3]] -; CHECK-NEXT: [[TMP13]] = add nuw nsw <vscale x 4 x i64> [[VEC_PHI]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <vscale x 4 x i64> [[TMP13]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 512, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP13]] = add nuw nsw <vscale x 4 x i64> [[VEC_PHI]], splat (i64 1) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <vscale x 4 x i64> [[TMP13]], splat (i64 512) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0 -; CHECK-NEXT: br i1 [[TMP15]], label [[OUTER_LOOP_LATCH4]], label [[INNER_LOOP1]] +; CHECK-NEXT: br i1 [[TMP15]], label [[VECTOR_LATCH]], label [[INNER_LOOP1]] ; CHECK: vector.latch: -; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <vscale x 4 x float> [ [[TMP12]], [[INNER_LOOP1]] ] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI5]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x float> [ [[TMP12]], [[INNER_LOOP1]] ] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI4]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index d18cdc1..72a1bb2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -26,7 +26,7 @@ define void @test_invar_gep(ptr %dst) #0 { ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], splat (i64 1) ; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 1eab166b..307b47c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -25,14 +25,14 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP13]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT6]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP25:%.*]] = lshr <vscale x 4 x i32> [[BROADCAST_SPLAT7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; DEFAULT-NEXT: [[TMP31:%.*]] = shl <vscale x 4 x i32> [[BROADCAST_SPLAT7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; DEFAULT-NEXT: [[TMP25:%.*]] = lshr <vscale x 4 x i32> [[BROADCAST_SPLAT7]], splat (i32 1) +; DEFAULT-NEXT: [[TMP31:%.*]] = shl <vscale x 4 x i32> [[BROADCAST_SPLAT7]], splat (i32 1) ; DEFAULT-NEXT: [[TMP33:%.*]] = or <vscale x 4 x i32> [[TMP25]], [[TMP31]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT8]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP39:%.*]] = or <vscale x 4 x i32> [[BROADCAST_SPLAT9]], [[BROADCAST_SPLAT7]] -; DEFAULT-NEXT: [[TMP41:%.*]] = and <vscale x 4 x i32> [[TMP39]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; DEFAULT-NEXT: [[TMP43:%.*]] = xor <vscale x 4 x i32> [[TMP41]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; DEFAULT-NEXT: [[TMP14:%.*]] = and <vscale x 4 x i32> [[TMP39]], splat (i32 1) +; DEFAULT-NEXT: [[TMP43:%.*]] = xor <vscale x 4 x i32> [[TMP14]], splat (i32 1) ; DEFAULT-NEXT: [[TMP45:%.*]] = zext <vscale x 4 x i32> [[TMP43]] to <vscale x 4 x i64> ; DEFAULT-NEXT: [[TMP61:%.*]] = extractelement <vscale x 4 x i64> [[TMP45]], i32 0 ; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr i32, ptr [[SRC_2]], i64 [[TMP61]] @@ -62,24 +62,24 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; DEFAULT-NEXT: [[TMP22:%.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> [[TMP19]], <vscale x 4 x i32> [[TMP20]], i32 -1) ; DEFAULT-NEXT: [[TMP23:%.*]] = or <vscale x 4 x i32> [[TMP21]], [[BROADCAST_SPLAT7]] ; DEFAULT-NEXT: [[TMP24:%.*]] = or <vscale x 4 x i32> [[TMP22]], [[BROADCAST_SPLAT7]] -; DEFAULT-NEXT: [[TMP27:%.*]] = shl <vscale x 4 x i32> [[TMP23]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; DEFAULT-NEXT: [[TMP28:%.*]] = shl <vscale x 4 x i32> [[TMP24]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; DEFAULT-NEXT: [[TMP29:%.*]] = or <vscale x 4 x i32> [[TMP27]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; DEFAULT-NEXT: [[TMP30:%.*]] = or <vscale x 4 x i32> [[TMP28]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; DEFAULT-NEXT: [[TMP34:%.*]] = shl <vscale x 4 x i32> [[TMP23]], splat (i32 1) +; DEFAULT-NEXT: [[TMP32:%.*]] = shl <vscale x 4 x i32> [[TMP24]], splat (i32 1) +; DEFAULT-NEXT: [[TMP29:%.*]] = or <vscale x 4 x i32> [[TMP34]], splat (i32 2) +; DEFAULT-NEXT: [[TMP30:%.*]] = or <vscale x 4 x i32> [[TMP32]], splat (i32 2) ; DEFAULT-NEXT: [[TMP35:%.*]] = or <vscale x 4 x i32> [[TMP33]], [[TMP29]] ; DEFAULT-NEXT: [[TMP36:%.*]] = or <vscale x 4 x i32> [[TMP33]], [[TMP30]] ; DEFAULT-NEXT: [[TMP37:%.*]] = or <vscale x 4 x i32> [[TMP35]], [[BROADCAST_SPLAT7]] ; DEFAULT-NEXT: [[TMP38:%.*]] = or <vscale x 4 x i32> [[TMP36]], [[BROADCAST_SPLAT7]] -; DEFAULT-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP47]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison) -; DEFAULT-NEXT: [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP47]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison) -; DEFAULT-NEXT: [[TMP49:%.*]] = lshr <vscale x 4 x i32> [[TMP37]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; DEFAULT-NEXT: [[TMP50:%.*]] = lshr <vscale x 4 x i32> [[TMP38]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; DEFAULT-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP47]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison) +; DEFAULT-NEXT: [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP47]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison) +; DEFAULT-NEXT: [[TMP49:%.*]] = lshr <vscale x 4 x i32> [[TMP37]], splat (i32 1) +; DEFAULT-NEXT: [[TMP50:%.*]] = lshr <vscale x 4 x i32> [[TMP38]], splat (i32 1) ; DEFAULT-NEXT: [[TMP51:%.*]] = zext <vscale x 4 x i32> [[TMP49]] to <vscale x 4 x i64> ; DEFAULT-NEXT: [[TMP52:%.*]] = zext <vscale x 4 x i32> [[TMP50]] to <vscale x 4 x i64> ; DEFAULT-NEXT: [[TMP53:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP51]] ; DEFAULT-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP52]] -; DEFAULT-NEXT: [[WIDE_MASKED_GATHER11:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP53]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison) -; DEFAULT-NEXT: [[WIDE_MASKED_GATHER12:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP54]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison) +; DEFAULT-NEXT: [[WIDE_MASKED_GATHER11:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP53]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison) +; DEFAULT-NEXT: [[WIDE_MASKED_GATHER12:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP54]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison) ; DEFAULT-NEXT: [[TMP55:%.*]] = or <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], [[VEC_PHI]] ; DEFAULT-NEXT: [[TMP56:%.*]] = or <vscale x 4 x i32> [[WIDE_MASKED_GATHER10]], [[VEC_PHI3]] ; DEFAULT-NEXT: [[TMP57]] = or <vscale x 4 x i32> [[TMP55]], [[WIDE_MASKED_GATHER11]] @@ -161,14 +161,14 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP19]] ; PRED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -; PRED-NEXT: [[TMP25:%.*]] = lshr <vscale x 4 x i32> [[BROADCAST_SPLAT4]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; PRED-NEXT: [[TMP28:%.*]] = shl <vscale x 4 x i32> [[BROADCAST_SPLAT4]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; PRED-NEXT: [[TMP25:%.*]] = lshr <vscale x 4 x i32> [[BROADCAST_SPLAT4]], splat (i32 1) +; PRED-NEXT: [[TMP28:%.*]] = shl <vscale x 4 x i32> [[BROADCAST_SPLAT4]], splat (i32 1) ; PRED-NEXT: [[TMP29:%.*]] = or <vscale x 4 x i32> [[TMP25]], [[TMP28]] ; PRED-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; PRED-NEXT: [[TMP32:%.*]] = or <vscale x 4 x i32> [[BROADCAST_SPLAT6]], [[BROADCAST_SPLAT4]] -; PRED-NEXT: [[TMP33:%.*]] = and <vscale x 4 x i32> [[TMP32]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; PRED-NEXT: [[TMP34:%.*]] = xor <vscale x 4 x i32> [[TMP33]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; PRED-NEXT: [[TMP26:%.*]] = and <vscale x 4 x i32> [[TMP32]], splat (i32 1) +; PRED-NEXT: [[TMP34:%.*]] = xor <vscale x 4 x i32> [[TMP26]], splat (i32 1) ; PRED-NEXT: [[TMP35:%.*]] = zext <vscale x 4 x i32> [[TMP34]] to <vscale x 4 x i64> ; PRED-NEXT: [[TMP46:%.*]] = extractelement <vscale x 4 x i64> [[TMP35]], i32 0 ; PRED-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[SRC_2]], i64 [[TMP46]] @@ -195,12 +195,12 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP22]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> [[VECTOR_RECUR]], <vscale x 4 x i32> [[BROADCAST_SPLAT]], i32 -1) ; PRED-NEXT: [[TMP23:%.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> [[VECTOR_RECUR2]], <vscale x 4 x i32> [[TMP22]], i32 -1) ; PRED-NEXT: [[TMP24:%.*]] = or <vscale x 4 x i32> [[TMP23]], [[BROADCAST_SPLAT4]] -; PRED-NEXT: [[TMP26:%.*]] = shl <vscale x 4 x i32> [[TMP24]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; PRED-NEXT: [[TMP27:%.*]] = or <vscale x 4 x i32> [[TMP26]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; PRED-NEXT: [[TMP33:%.*]] = shl <vscale x 4 x i32> [[TMP24]], splat (i32 1) +; PRED-NEXT: [[TMP27:%.*]] = or <vscale x 4 x i32> [[TMP33]], splat (i32 2) ; PRED-NEXT: [[TMP30:%.*]] = or <vscale x 4 x i32> [[TMP29]], [[TMP27]] ; PRED-NEXT: [[TMP31:%.*]] = or <vscale x 4 x i32> [[TMP30]], [[BROADCAST_SPLAT4]] ; PRED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP36]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) -; PRED-NEXT: [[TMP37:%.*]] = lshr <vscale x 4 x i32> [[TMP31]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; PRED-NEXT: [[TMP37:%.*]] = lshr <vscale x 4 x i32> [[TMP31]], splat (i32 1) ; PRED-NEXT: [[TMP38:%.*]] = zext <vscale x 4 x i32> [[TMP37]] to <vscale x 4 x i64> ; PRED-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP38]] ; PRED-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP39]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) @@ -209,7 +209,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP42]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[TMP41]], <vscale x 4 x i32> [[VEC_PHI]] ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP12]]) -; PRED-NEXT: [[TMP43:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; PRED-NEXT: [[TMP43:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP44:%.*]] = extractelement <vscale x 4 x i1> [[TMP43]], i32 0 ; PRED-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: @@ -396,7 +396,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP16]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i16> [[TMP20]], <vscale x 4 x i16> [[VEC_PHI]] ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP12]]) -; PRED-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; PRED-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0 ; PRED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index 385e83e..41c92d6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -30,7 +30,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 2 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP9]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP9]], splat (i32 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]] ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP13]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll index a55e53c..dabff1b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll @@ -33,7 +33,7 @@ for.end: ; preds = %for.body, %entry define void @vec_scalar(i64 %N, ptr nocapture %a) { ; CHECK-LABEL: @vec_scalar ; CHECK: vector.body: -; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+01, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> splat (double 1.000000e+01)) entry: %cmp7 = icmp sgt i64 %N, 0 br i1 %cmp7, label %for.body, label %for.end diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll index 038a221..c7cc095 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll @@ -24,7 +24,7 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> poison) @@ -111,11 +111,11 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> poison) -; CHECK-NEXT: [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0x47EFFFFFE0000000, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> splat (float 0x47EFFFFFE0000000) ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP12]]) ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP13]], [[VEC_PHI]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP13]], float [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index 437d13a..6ecaff0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -47,7 +47,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 @@ -151,11 +151,11 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP10]] ; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP12]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[WIDE_MASKED_LOAD]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[WIDE_MASKED_LOAD]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP13]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = extractelement <vscale x 8 x i1> [[TMP15]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: @@ -233,10 +233,10 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 @@ -400,13 +400,13 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP21]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> poison) ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP24]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> poison) ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0(ptr [[TMP27]], i32 4, <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[WIDE_MASKED_LOAD]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[WIDE_MASKED_LOAD]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP28]]) -; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP30:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP29]], <vscale x 8 x float> [[TMP30]]) -; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP31]], <vscale x 8 x float> [[TMP32]]) -; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP34:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP35]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP33]], <vscale x 8 x float> [[TMP34]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() @@ -422,7 +422,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP38]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP41]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP44]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = extractelement <vscale x 8 x i1> [[TMP45]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: @@ -515,8 +515,8 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float [[A2]], i32 0 -; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float [[A1]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[A2]], i32 0 +; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[A1]], i32 0 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -672,13 +672,13 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_MASKED_VEC]]) ; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0 ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 1 -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP17]]) -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP15]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP15]], <vscale x 4 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP20]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], <vscale x 4 x float> [[TMP19]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP12]]) -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = extractelement <vscale x 4 x i1> [[TMP21]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: @@ -791,7 +791,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] ; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 @@ -929,11 +929,11 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP15]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = fadd <vscale x 4 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP17]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP10]]) -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i1> [[TMP19]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: @@ -1032,7 +1032,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float 1.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> splat (float -0.000000e+00), float 1.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 @@ -1041,8 +1041,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP6]] ; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 ; CHECK-UNORDERED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> poison) -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = xor <vscale x 4 x i1> [[TMP9]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-UNORDERED-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = xor <vscale x 4 x i1> [[TMP9]], splat (i1 true) +; CHECK-UNORDERED-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x float> [[WIDE_MASKED_LOAD]] ; CHECK-UNORDERED-NEXT: [[TMP13]] = fadd <vscale x 4 x float> [[VEC_PHI]], [[PREDPHI]] ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1102,8 +1102,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP6]] ; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 ; CHECK-ORDERED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> poison) -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = xor <vscale x 4 x i1> [[TMP9]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-ORDERED-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = xor <vscale x 4 x i1> [[TMP9]], splat (i1 true) +; CHECK-ORDERED-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x float> [[WIDE_MASKED_LOAD]] ; CHECK-ORDERED-NEXT: [[TMP13]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[PREDPHI]]) ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1169,12 +1169,12 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP10]] ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP15]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP16]], i32 4, <vscale x 4 x i1> [[TMP14]], <vscale x 4 x float> poison) -; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x float> [[WIDE_MASKED_LOAD1]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[PREDPHI]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x float> [[WIDE_MASKED_LOAD1]], <vscale x 4 x float> splat (float 3.000000e+00) +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[PREDPHI]], <vscale x 4 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP17]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i1> [[TMP19]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: @@ -1275,7 +1275,7 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float -0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float -0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 @@ -1415,10 +1415,10 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 @@ -1639,13 +1639,13 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] ; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] ; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = fmul <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[TMP39]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[TMP39]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP43]]) -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[TMP40]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[TMP40]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP44]], <vscale x 8 x float> [[TMP45]]) -; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[TMP41]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[TMP41]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP46]], <vscale x 8 x float> [[TMP47]]) -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[TMP42]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[TMP42]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP50]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP48]], <vscale x 8 x float> [[TMP49]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() @@ -1661,7 +1661,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP56]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP59]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = extractelement <vscale x 8 x i1> [[TMP60]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: @@ -1746,10 +1746,10 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] -; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] +; CHECK-UNORDERED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[VECTOR_BODY]] ] ; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 @@ -1970,13 +1970,13 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD13]] ; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD10]], [[WIDE_MASKED_LOAD14]] ; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_MASKED_LOAD11]], [[WIDE_MASKED_LOAD15]] -; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[TMP39]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[TMP39]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP44:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[TMP43]]) -; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[TMP40]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 8 x float> [[TMP40]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP44]], <vscale x 8 x float> [[TMP45]]) -; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[TMP41]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 8 x float> [[TMP41]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP48:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP46]], <vscale x 8 x float> [[TMP47]]) -; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[TMP42]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP49:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 8 x float> [[TMP42]], <vscale x 8 x float> splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP50]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP48]], <vscale x 8 x float> [[TMP49]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() @@ -1992,7 +1992,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP56]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP59]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP60:%.*]] = xor <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-ORDERED-TF-NEXT: [[TMP61:%.*]] = extractelement <vscale x 8 x i1> [[TMP60]], i32 0 ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index 7d2fc348..6f5b6f4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -107,7 +107,7 @@ define void @cost_store_i8(ptr %dst) #0 { ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> zeroinitializer, ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]]) -; PRED-NEXT: [[TMP15:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer) +; PRED-NEXT: [[TMP15:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; PRED-NEXT: [[TMP16:%.*]] = extractelement <vscale x 16 x i1> [[TMP15]], i32 0 ; PRED-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll index b3cec2a..b5ba003 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll @@ -11,7 +11,7 @@ define void @cmpsel_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly ; CHECK: vector.body: ; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr {{.*}}, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 10, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> splat (i32 2), <vscale x 4 x i32> splat (i32 10) ; CHECK: store <vscale x 4 x i32> [[TMP2]], ptr {{.*}}, align 4 ; entry: @@ -41,8 +41,8 @@ define void @cmpsel_f32(ptr noalias nocapture %a, ptr noalias nocapture readonly ; CHECK-NEXT: entry: ; CHECK: vector.body: ; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr {{.*}}, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+01, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-NEXT: [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> splat (float 1.000000e+01), <vscale x 4 x float> splat (float 2.000000e+00) ; CHECK: store <vscale x 4 x float> [[TMP2]], ptr {{.*}}, align 4 entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll index 43f264c..a5c3817 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll @@ -101,7 +101,7 @@ define void @cond_inv_load_f64f64f64(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x double>, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <vscale x 4 x double> [[WIDE_LOAD]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 4.000000e-01, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <vscale x 4 x double> [[WIDE_LOAD]], splat (double 4.000000e-01) ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x double> poison) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> [[WIDE_MASKED_GATHER]], ptr [[TMP7]], i32 8, <vscale x 4 x i1> [[TMP6]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index c38c4b0..36af7eb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -46,8 +46,8 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 16 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 [[TMP18]] -; CHECK-NEXT: store <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), ptr [[TMP16]], align 1 -; CHECK-NEXT: store <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), ptr [[TMP19]], align 1 +; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP16]], align 1 +; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -74,7 +74,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX5]], 0 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP28]], i32 0 -; CHECK-NEXT: store <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), ptr [[TMP29]], align 1 +; CHECK-NEXT: store <vscale x 8 x i8> splat (i8 1), ptr [[TMP29]], align 1 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP26]] ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -118,8 +118,8 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 16 ; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[TMP16]] -; CHECK-VF8-NEXT: store <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), ptr [[TMP14]], align 1 -; CHECK-VF8-NEXT: store <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), ptr [[TMP17]], align 1 +; CHECK-VF8-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP14]], align 1 +; CHECK-VF8-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP17]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -212,8 +212,8 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] -; CHECK-NEXT: store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP14]], align 1 -; CHECK-NEXT: store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP17]], align 1 +; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP14]], align 1 +; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -275,8 +275,8 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 ; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] -; CHECK-VF8-NEXT: store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP14]], align 1 -; CHECK-VF8-NEXT: store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP17]], align 1 +; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP14]], align 1 +; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP17]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll index 592b118..e437da2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -21,7 +21,7 @@ define void @gather_nxv4i32_ind64(ptr noalias nocapture readonly %a, ptr noalias ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], <vscale x 4 x i64> [[WIDE_LOAD]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] @@ -91,7 +91,7 @@ define void @scatter_nxv4i32_ind32(ptr noalias nocapture %a, ptr noalias nocaptu ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD1]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], <vscale x 4 x i64> [[TMP7]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x ptr> [[TMP8]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x ptr> [[TMP8]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -157,7 +157,7 @@ define void @scatter_inv_nxv4i32(ptr noalias nocapture %inv, ptr noalias nocaptu ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]]) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> splat (i32 3), <vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -226,7 +226,7 @@ define void @gather_inv_nxv4i32(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i32> poison) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP5]], i32 4, <vscale x 4 x i1> [[TMP6]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index b418905..0761e4a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -26,7 +26,7 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i8> @llvm.stepvector.nxv2i8() ; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i8> [[TMP6]] to <vscale x 2 x i7> ; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 2 x i7> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i7> [[TMP8]], shufflevector (<vscale x 2 x i7> insertelement (<vscale x 2 x i7> poison, i7 1, i64 0), <vscale x 2 x i7> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i7> [[TMP8]], splat (i7 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i7> zeroinitializer, [[TMP9]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: @@ -93,7 +93,7 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i8> @llvm.stepvector.nxv2i8() ; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i8> [[TMP6]] to <vscale x 2 x i3> ; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 2 x i3> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i3> [[TMP8]], shufflevector (<vscale x 2 x i3> insertelement (<vscale x 2 x i3> poison, i3 1, i64 0), <vscale x 2 x i3> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i3> [[TMP8]], splat (i3 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i3> zeroinitializer, [[TMP9]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 2c8271c..632687b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -107,7 +107,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl <vscale x 4 x i64> [[TMP2]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP3:%.*]] = shl <vscale x 4 x i64> [[TMP2]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer @@ -121,10 +121,10 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, <vscale x 4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison) -; CHECK-NEXT: [[TMP7:%.*]] = or disjoint <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 2, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i16> poison) +; CHECK-NEXT: [[TMP7:%.*]] = or disjoint <vscale x 4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, <vscale x 4 x i64> [[TMP7]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP8]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP8]], i32 2, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i16> poison) ; CHECK-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <vscale x 4 x i32> [[BROADCAST_SPLAT]], [[TMP9]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[OFFSET_IDX]] @@ -196,7 +196,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl <vscale x 4 x i64> [[TMP2]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP3:%.*]] = shl <vscale x 4 x i64> [[TMP2]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer @@ -214,15 +214,15 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = or disjoint <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = or disjoint <vscale x 4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <vscale x 4 x i32> [[TMP7]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = trunc <vscale x 4 x i32> [[TMP10]] to <vscale x 4 x i16> ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, <vscale x 4 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> [[TMP11]], <vscale x 4 x ptr> [[TMP12]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> [[TMP11]], <vscale x 4 x ptr> [[TMP12]], i32 2, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[TMP13:%.*]] = mul nsw <vscale x 4 x i32> [[TMP8]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 4 x i32> [[TMP13]] to <vscale x 4 x i16> ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, <vscale x 4 x i64> [[TMP9]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> [[TMP14]], <vscale x 4 x ptr> [[TMP15]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> [[TMP14]], <vscale x 4 x ptr> [[TMP15]], i32 2, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 @@ -283,17 +283,17 @@ define i32 @test_struct_load6(ptr %S) #1 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP2]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ST6:%.*]], ptr [[S:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP5]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP5]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], <vscale x 4 x i64> [[VEC_IND]], i32 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], <vscale x 4 x i64> [[VEC_IND]], i32 2 -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP7]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP7]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], <vscale x 4 x i64> [[VEC_IND]], i32 3 -; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP8]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP8]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], <vscale x 4 x i64> [[VEC_IND]], i32 4 -; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], <vscale x 4 x i64> [[VEC_IND]], i32 5 -; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison) ; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP11]], [[WIDE_MASKED_GATHER2]] ; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER1]], [[WIDE_MASKED_GATHER3]] @@ -377,7 +377,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() -; CHECK-NEXT: [[INDUCTION:%.*]] = sub <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1023, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), [[TMP2]] +; CHECK-NEXT: [[INDUCTION:%.*]] = sub <vscale x 4 x i32> splat (i32 1023), [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[TMP1]] to i32 ; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i32 0, [[TMP3]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[DOTNEG]], i64 0 @@ -478,7 +478,7 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = shl nsw <vscale x 4 x i32> [[TMP5]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP6:%.*]] = shl nsw <vscale x 4 x i32> [[TMP5]], splat (i32 1) ; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[INDEX]], 9223372036854775804 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP7]] ; CHECK-NEXT: store <vscale x 4 x i32> [[TMP6]], ptr [[TMP8]], align 4 @@ -564,7 +564,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP12]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = shl nsw <vscale x 4 x i32> [[TMP13]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = shl nsw <vscale x 4 x i32> [[TMP13]], splat (i32 1) ; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[INDEX]], 9223372036854775804 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP15]] ; CHECK-NEXT: store <vscale x 4 x i32> [[TMP14]], ptr [[TMP16]], align 4 @@ -636,7 +636,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[INDUCTION:%.*]] = sub <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1023, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), [[TMP2]] +; CHECK-NEXT: [[INDUCTION:%.*]] = sub <vscale x 4 x i64> splat (i64 1023), [[TMP2]] ; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i64 0, [[TMP1]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[DOTNEG]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer @@ -649,10 +649,10 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P1:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 8, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i64> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 8, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i64> poison) ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <vscale x 4 x i64> [[WIDE_MASKED_GATHER]], [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP4]], <vscale x 4 x ptr> [[TMP5]], i32 8, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP7]], <vscale x 4 x ptr> [[TMP6]], i32 8, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP4]], <vscale x 4 x ptr> [[TMP5]], i32 8, <vscale x 4 x i1> splat (i1 true)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP7]], <vscale x 4 x ptr> [[TMP6]], i32 8, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -895,12 +895,12 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], <vscale x 4 x i64> [[VEC_IND]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x ptr> [[TMP12]], i64 0 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP14]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP15]], <vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP15]], <vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -982,7 +982,7 @@ define i32 @PR27626_1(ptr %p, i64 %n) #1 { ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP12]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP14]], <vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP14]], <vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x ptr> [[TMP13]], i64 0 ; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <vscale x 8 x i32>, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC1]]) @@ -1074,11 +1074,11 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 -8 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], <vscale x 4 x i64> [[VEC_IND]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP13]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP15]], <vscale x 4 x ptr> [[TMP14]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP15]], <vscale x 4 x ptr> [[TMP14]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1158,14 +1158,14 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], <vscale x 4 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP13]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP16]], <vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP16]], <vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <vscale x 8 x i32>, ptr [[TMP14]], align 4 ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC1]]) ; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC2]], 0 @@ -1252,7 +1252,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = shl <vscale x 4 x i64> [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer @@ -1267,7 +1267,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <vscale x 4 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[P:%.*]] = extractelement <vscale x 4 x ptr> [[TMP13]], i64 0 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[BROADCAST_SPLAT2]], <vscale x 4 x i32> [[BROADCAST_SPLAT4]]) ; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[P]], align 4 @@ -1344,8 +1344,8 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i64> [[TMP9]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> [[TMP10]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i64> [[TMP9]], splat (i64 1) +; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> [[TMP10]], splat (i64 3) ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer @@ -1359,14 +1359,14 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 -1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 -3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], splat (i64 -1) +; CHECK-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], splat (i64 -3) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <vscale x 4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], <vscale x 4 x i64> [[TMP13]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], <vscale x 4 x i64> [[TMP14]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP16]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT2]], <vscale x 4 x ptr> [[TMP17]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT4]], <vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP16]], i32 4, <vscale x 4 x i1> splat (i1 true)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT2]], <vscale x 4 x ptr> [[TMP17]], i32 4, <vscale x 4 x i1> splat (i1 true)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT4]], <vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1457,7 +1457,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl <vscale x 4 x i64> [[TMP14]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = shl <vscale x 4 x i64> [[TMP14]], splat (i64 1) ; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer @@ -1466,13 +1466,13 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_MASKED_GATHER4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP15]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP19:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP19:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[A]], <vscale x 4 x i64> [[TMP18]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP20]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison), !alias.scope [[META34:![0-9]+]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP20]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i16> poison), !alias.scope [[META34:![0-9]+]] ; CHECK-NEXT: [[TMP21:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[A]], <vscale x 4 x i64> [[TMP19]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER4]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP22]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison), !alias.scope [[META34]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER4]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP22]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i16> poison), !alias.scope [[META34]] ; CHECK-NEXT: [[TMP23:%.*]] = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> [[VECTOR_RECUR]], <vscale x 4 x i16> [[WIDE_MASKED_GATHER4]], i32 -1) ; CHECK-NEXT: [[TMP24:%.*]] = sext <vscale x 4 x i16> [[TMP23]] to <vscale x 4 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER4]] to <vscale x 4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 26ac9c3..f56fae3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -230,15 +230,15 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> poison, i32 1, i64 0), <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = zext nneg <vscale x 16 x i32> [[TMP7]] to <vscale x 16 x i64> ; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP8]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x ptr> [[TMP9]], i32 1, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer)) +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 1), <vscale x 16 x ptr> [[TMP9]], i32 1, <vscale x 16 x i1> splat (i1 true)) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = or disjoint <vscale x 16 x i32> [[TMP7]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> poison, i32 1, i64 0), <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = or disjoint <vscale x 16 x i32> [[TMP7]], splat (i32 1) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = zext nneg <vscale x 16 x i32> [[TMP11]] to <vscale x 16 x i64> ; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP12]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 2, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x ptr> [[TMP13]], i32 1, <vscale x 16 x i1> [[TMP10]]) +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 2), <vscale x 16 x ptr> [[TMP13]], i32 1, <vscale x 16 x i1> [[TMP10]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[DOTSPLAT]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -292,16 +292,16 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> poison, i32 1, i64 0), <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = zext nneg <vscale x 16 x i32> [[TMP6]] to <vscale x 16 x i64> ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP7]] -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x ptr> [[TMP8]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 1), <vscale x 16 x ptr> [[TMP8]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i1> zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = or disjoint <vscale x 16 x i32> [[TMP6]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> poison, i32 1, i64 0), <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = or disjoint <vscale x 16 x i32> [[TMP6]], splat (i32 1) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = zext nneg <vscale x 16 x i32> [[TMP11]] to <vscale x 16 x i64> ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP12]] -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 2, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x ptr> [[TMP13]], i32 1, <vscale x 16 x i1> [[TMP10]]) +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 2), <vscale x 16 x ptr> [[TMP13]], i32 1, <vscale x 16 x i1> [[TMP10]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[DOTSPLAT]] @@ -390,16 +390,16 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> poison, i32 1, i64 0), <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = zext nneg <vscale x 16 x i32> [[TMP7]] to <vscale x 16 x i64> ; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP9]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x ptr> [[TMP10]], i32 1, <vscale x 16 x i1> [[TMP8]]) +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 1), <vscale x 16 x ptr> [[TMP10]], i32 1, <vscale x 16 x i1> [[TMP8]]) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = or disjoint <vscale x 16 x i32> [[TMP7]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> poison, i32 1, i64 0), <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = or disjoint <vscale x 16 x i32> [[TMP7]], splat (i32 1) ; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = zext nneg <vscale x 16 x i32> [[TMP12]] to <vscale x 16 x i64> ; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP13]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 2, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x ptr> [[TMP14]], i32 1, <vscale x 16 x i1> [[TMP11]]) +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 2), <vscale x 16 x ptr> [[TMP14]], i32 1, <vscale x 16 x i1> [[TMP11]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[DOTSPLAT]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -461,18 +461,18 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> poison, i32 1, i64 0), <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = zext nneg <vscale x 16 x i32> [[TMP6]] to <vscale x 16 x i64> ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP9]] -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x ptr> [[TMP10]], i32 1, <vscale x 16 x i1> [[TMP8]]) +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 1), <vscale x 16 x ptr> [[TMP10]], i32 1, <vscale x 16 x i1> [[TMP8]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP11]], <vscale x 16 x i1> zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = or disjoint <vscale x 16 x i32> [[TMP6]], shufflevector (<vscale x 16 x i32> insertelement (<vscale x 16 x i32> poison, i32 1, i64 0), <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = or disjoint <vscale x 16 x i32> [[TMP6]], splat (i32 1) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = zext nneg <vscale x 16 x i32> [[TMP13]] to <vscale x 16 x i64> ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP14]] -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 2, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x ptr> [[TMP15]], i32 1, <vscale x 16 x i1> [[TMP12]]) +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> splat (i8 2), <vscale x 16 x ptr> [[TMP15]], i32 1, <vscale x 16 x i1> [[TMP12]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[DOTSPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll index 4a8e637..a1a89ee 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll @@ -5,7 +5,7 @@ define void @stride7_i32(ptr noalias nocapture %dst, i64 %n) #0 { ; CHECK-LABEL: @stride7_i32( ; CHECK: vector.body ; CHECK: %[[VEC_IND:.*]] = phi <vscale x 4 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ] -; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 7, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 4 x i64> %[[VEC_IND]], splat (i64 7) ; CHECK-NEXT: %[[PTRS:.*]] = getelementptr inbounds i32, ptr %dst, <vscale x 4 x i64> %[[PTR_INDICES]] ; CHECK-NEXT: %[[GLOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %[[PTRS]] ; CHECK-NEXT: %[[VALS:.*]] = add nsw <vscale x 4 x i32> %[[GLOAD]], @@ -32,7 +32,7 @@ define void @stride7_f64(ptr noalias nocapture %dst, i64 %n) #0 { ; CHECK-LABEL: @stride7_f64( ; CHECK: vector.body ; CHECK: %[[VEC_IND:.*]] = phi <vscale x 2 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ] -; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 2 x i64> %[[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 7, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 2 x i64> %[[VEC_IND]], splat (i64 7) ; CHECK-NEXT: %[[PTRS:.*]] = getelementptr inbounds double, ptr %dst, <vscale x 2 x i64> %[[PTR_INDICES]] ; CHECK-NEXT: %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %[[PTRS]], ; CHECK-NEXT: %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]], diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll index 5836d9c..ca2f969 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll @@ -16,7 +16,7 @@ define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> {{%.*}}, ptr {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[VF]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 7) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NOT:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NOT:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[COND:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NOT]], i32 0 ; CHECK-NEXT: br i1 [[COND]], label %middle.block, label %vector.body ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll index d1224eb..daf2984 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll @@ -10,8 +10,8 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32> -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], splat (i32 3) +; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]]) @@ -25,14 +25,14 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC4: [[VEC_PHI2:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] ; CHECK-VF4IC4: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] ; CHECK-VF4IC4: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP1]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP2]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP3]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP4]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, splat (i32 3) +; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, splat (i32 3) +; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, splat (i32 3) +; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, splat (i32 3) +; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP1]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP2]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP3]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP4]], splat (i1 true) ; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or <vscale x 4 x i1> [[VEC_PHI1]], [[NOT1]] ; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or <vscale x 4 x i1> [[VEC_PHI2]], [[NOT2]] ; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <vscale x 4 x i1> [[VEC_PHI3]], [[NOT3]] @@ -71,8 +71,8 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32> -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], splat (i32 3) +; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]]) @@ -105,8 +105,8 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x float> -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = fcmp fast ueq <vscale x 4 x float> [[VEC_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = fcmp fast ueq <vscale x 4 x float> [[VEC_LOAD]], splat (float 3.000000e+00) +; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]]) @@ -162,9 +162,9 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32> -; CHECK-VF4IC1: [[MASK:%.*]] = icmp sgt <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC1: [[MASK:%.*]] = icmp sgt <vscale x 4 x i32> [[VEC_LOAD]], splat (i32 35) ; CHECK-VF4IC1: [[MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr {{%.*}}, i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> poison) -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[MASKED_LOAD]], splat (i32 2) ; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[VEC_ICMP]] ; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x i1> [[VEC_SEL_TMP]], <vscale x 4 x i1> [[VEC_PHI]] ; CHECK-VF4IC1: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 0b3f28e..0a7948d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -71,7 +71,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i1> [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll index 7514690..0f9ac7b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll @@ -25,7 +25,7 @@ define void @trip1025_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapt ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) -; CHECK-NEXT: [[TMP10:%.*]] = shl nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = shl nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP12]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison) @@ -33,7 +33,7 @@ define void @trip1025_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapt ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP13]], ptr [[TMP12]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i1> [[TMP14]], i32 0 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll index df10784..f61d473 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll @@ -24,7 +24,7 @@ define void @cannot_overflow_i32_induction_var(ptr noalias %dst, ptr readonly %s ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP0]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 42, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 42) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP1]], ptr [[TMP2]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] @@ -88,7 +88,7 @@ define void @can_overflow_i64_induction_var(ptr noalias %dst, ptr readonly %src, ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP3]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 42, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP4]], ptr [[TMP5]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index 8108a49..bd8fe69 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -39,7 +39,7 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP14]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[TMP13]], <vscale x 4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -95,7 +95,7 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP15]] = add i32 [[TMP14]], [[VEC_PHI]] ; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-IN-LOOP-NEXT: [[TMP19:%.*]] = extractelement <vscale x 4 x i1> [[TMP18]], i32 0 ; CHECK-IN-LOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-IN-LOOP: middle.block: @@ -163,11 +163,11 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[PTR:%.*]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) -; CHECK-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> splat (float -0.000000e+00) ; CHECK-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP13]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -217,11 +217,11 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[PTR:%.*]], i64 [[TMP10]] ; CHECK-IN-LOOP-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP11]], i32 0 ; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison) -; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> splat (float -0.000000e+00) ; CHECK-IN-LOOP-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP13]]) ; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0 ; CHECK-IN-LOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-IN-LOOP: middle.block: @@ -288,7 +288,7 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 5, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 5) ; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 @@ -298,7 +298,7 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: [[TMP20]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[PREDPHI]], <vscale x 4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP23:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP23:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 4 x i1> [[TMP23]], i32 0 ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -356,7 +356,7 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[TMP10]] ; CHECK-IN-LOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 ; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) -; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 5, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 5) ; CHECK-IN-LOOP-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> zeroinitializer ; CHECK-IN-LOOP-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]] ; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 @@ -366,7 +366,7 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP19]] = xor i32 [[TMP18]], [[VEC_PHI]] ; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP21]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-IN-LOOP-NEXT: [[TMP22:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-IN-LOOP-NEXT: [[TMP22:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-IN-LOOP-NEXT: [[TMP23:%.*]] = extractelement <vscale x 4 x i1> [[TMP22]], i32 0 ; CHECK-IN-LOOP-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-IN-LOOP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index 677f78e..3f33ee8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -75,7 +75,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT11]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP65]], i64 [[TMP9]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT12]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP68]], i64 [[TMP9]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP71]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP72:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP72:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP76:%.*]] = extractelement <vscale x 4 x i1> [[TMP72]], i32 0 ; CHECK-NEXT: br i1 [[TMP76]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -202,7 +202,7 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP87]], i64 [[TMP9]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT15]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP90]], i64 [[TMP9]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP93]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP94:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP94:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP98:%.*]] = extractelement <vscale x 4 x i1> [[TMP94]], i32 0 ; CHECK-NEXT: br i1 [[TMP98]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index a332340..2850aca 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -36,7 +36,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP13:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[TMP13]], i32 0 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -162,7 +162,7 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], ptr [[TMP14]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i1> [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -226,7 +226,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i64> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = mul <vscale x 4 x i64> [[TMP14]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 4, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = mul <vscale x 4 x i64> [[TMP14]], splat (i64 4) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP15]] ; CHECK-NEXT: [[TMP18:%.*]] = mul i64 4, [[TMP7]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0 @@ -242,7 +242,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x ptr> [[TMP20]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP7]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) -; CHECK-NEXT: [[TMP21:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP21:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <vscale x 4 x i1> [[TMP21]], i32 0 ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -315,7 +315,7 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x ptr> [[TMP14]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i1> [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -391,7 +391,7 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP13]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -462,7 +462,7 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[TMP13]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[TMP13]], splat (i1 true) ; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> poison) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x i32> zeroinitializer @@ -471,7 +471,7 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[PREDPHI]], ptr [[TMP17]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <vscale x 4 x i1> [[TMP18]], i32 0 ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -560,7 +560,7 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP13:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[TMP13]], i32 0 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -632,7 +632,7 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[TMP15]], ptr [[TMP14]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x i1> [[TMP16]], i32 0 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -706,12 +706,12 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP13]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP14]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) -; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD2]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD2]], <vscale x 4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP16:%.*]] = udiv <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[TMP15]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP16]], ptr [[TMP14]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index 76562e8..9567123 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -40,8 +40,8 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[TMP13]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP11]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x double>, ptr [[TMP15]], align 8 -; CHECK-NEXT: [[TMP16:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], splat (double 1.000000e+00) +; CHECK-NEXT: [[TMP17:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP5]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP19]] @@ -135,8 +135,8 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP16]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i64>, ptr [[TMP18]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD3]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP19:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], splat (i64 1) +; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD3]], splat (i64 1) ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP8]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP22]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 6ec9eb8..c3f753a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -70,7 +70,7 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 2 x i64> [[TMP13]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 2 x i64> [[TMP13]], splat (i64 1) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP14]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 0 @@ -81,7 +81,7 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP18]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP19]], align 1 -; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store <vscale x 2 x i8> [[TMP20]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]] @@ -163,12 +163,12 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 2 x i64> [[TMP13]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 2 x i64> [[TMP13]], splat (i64 1) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP14]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP16]], align 1 -; CHECK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store <vscale x 2 x i8> [[TMP17]], ptr [[TMP16]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP6]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index fd35fde..ade24b8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -46,16 +46,16 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt ; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC3]]) ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC4]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC4]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = add nsw <vscale x 4 x i32> [[TMP9]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = add nsw <vscale x 4 x i32> [[TMP11]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = add nsw <vscale x 4 x i32> [[TMP9]], splat (i32 1) +; CHECK-NEXT: [[TMP14:%.*]] = add nsw <vscale x 4 x i32> [[TMP11]], splat (i32 1) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP16]], 4 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[DOTIDX]] ; CHECK-NEXT: store <vscale x 4 x i32> [[TMP13]], ptr [[TMP15]], align 4 ; CHECK-NEXT: store <vscale x 4 x i32> [[TMP14]], ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = add nsw <vscale x 4 x i32> [[TMP10]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP19:%.*]] = add nsw <vscale x 4 x i32> [[TMP12]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP18:%.*]] = add nsw <vscale x 4 x i32> [[TMP10]], splat (i32 1) +; CHECK-NEXT: [[TMP19:%.*]] = add nsw <vscale x 4 x i32> [[TMP12]], splat (i32 1) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTIDX5:%.*]] = shl nuw nsw i64 [[TMP21]], 4 @@ -156,8 +156,8 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[DOTIDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP10:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD6]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP10:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD6]], splat (i32 1) ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTIDX7:%.*]] = shl nuw nsw i64 [[TMP11]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 [[DOTIDX7]] @@ -243,7 +243,7 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i64> [[TMP9]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i64> [[TMP9]], splat (i64 2) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP10]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]] @@ -318,7 +318,7 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr % ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[TMP4]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x ptr> [[VECTOR_GEP]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll index a617f5e..b8f94b9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll @@ -31,7 +31,7 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 % ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD1]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP14]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP15]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP15]], i32 1, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -58,7 +58,7 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 % ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 2 x i32>, ptr [[TMP18]], align 4 ; CHECK-NEXT: [[TMP19:%.*]] = zext <vscale x 2 x i32> [[WIDE_LOAD5]] to <vscale x 2 x i64> ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 2 x i64> [[TMP19]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv2p0.i32(<vscale x 2 x ptr> [[TMP20]], i32 1, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv2p0.i32(<vscale x 2 x ptr> [[TMP20]], i32 1, <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll index 9ca4b20..64746fa 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll @@ -84,7 +84,7 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe ; NORMAL_DEP_LIMIT-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[GEP_INDICES]], align 4, !alias.scope [[META0:![0-9]+]] ; NORMAL_DEP_LIMIT-NEXT: [[TMP11:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64> ; NORMAL_DEP_LIMIT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP11]] -; NORMAL_DEP_LIMIT-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP12]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; NORMAL_DEP_LIMIT-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP12]], i32 1, <vscale x 4 x i1> splat (i1 true)) ; NORMAL_DEP_LIMIT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IV]] ; NORMAL_DEP_LIMIT-NEXT: store <vscale x 4 x i32> [[VEC_IND]], ptr [[TMP13]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META5:![0-9]+]] ; NORMAL_DEP_LIMIT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[OTHER]], i64 [[IV]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 0c41477..511d4da 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -48,7 +48,7 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 % ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -114,7 +114,7 @@ define void @simple_histogram_inc_param(ptr noalias %buckets, ptr readonly %indi ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 [[INCVAL]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 [[INCVAL]], <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -180,7 +180,7 @@ define void @simple_histogram_sub(ptr noalias %buckets, ptr readonly %indices, i ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 -1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 -1, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -248,7 +248,7 @@ define void @conditional_histogram(ptr noalias %buckets, ptr readonly %indices, ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[CONDS]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD1]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 5100, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD1]], splat (i32 5100) ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> [[TMP13]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -331,7 +331,7 @@ define void @histogram_8bit(ptr noalias %buckets, ptr readonly %indices, i64 %N) ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP6]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i8(<vscale x 4 x ptr> [[TMP7]], i8 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i8(<vscale x 4 x ptr> [[TMP7]], i8 1, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -490,8 +490,8 @@ define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly ; CHECK-NEXT: [[TMP19:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD1]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP19]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP21]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> splat (i1 true)) +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP21]], i32 1, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -561,7 +561,7 @@ define void @histogram_array_3op_gep(i64 noundef %N) #0 { ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD1]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1048576 x i32], ptr @data_array, i64 0, <vscale x 4 x i64> [[TMP14]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP11]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP11]], i32 1, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -631,7 +631,7 @@ define void @histogram_array_4op_gep_nonzero_const_idx(i64 noundef %N, ptr reado ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[SOMESTRUCT:%.*]], ptr [[DATA_STRUCT]], i64 1, i32 0, <vscale x 4 x i64> [[TMP6]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP7]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP7]], i32 1, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -768,7 +768,7 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr % ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP13]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP14]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP14]], i32 1, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 4 x i32> [[VEC_IND]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] @@ -884,7 +884,7 @@ define void @simple_histogram_64b(ptr noalias %buckets, ptr readonly %indices, i ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[INDICES]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[BUCKETS]], <vscale x 2 x i64> [[WIDE_LOAD]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> [[TMP6]], i64 1, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> [[TMP6]], i64 1, <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index 3f67b38..0cfb2d5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -180,7 +180,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]] ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]]) -; DATA_AND_CONTROL-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; DATA_AND_CONTROL-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; DATA_AND_CONTROL-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0 ; DATA_AND_CONTROL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL: middle.block: @@ -229,7 +229,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i1> [[TMP15]], i32 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL_NO_RT_CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll index 468cc8f..68c8e4d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll @@ -40,7 +40,7 @@ define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP10:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i16> -; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 8 x i16> [[TMP10]], trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 2, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>) +; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 8 x i16> [[TMP10]], trunc (<vscale x 8 x i32> splat (i32 2) to <vscale x 8 x i16>) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 8 x i16> [[TMP11]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] @@ -122,7 +122,7 @@ define void @sext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP10:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i16> -; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 8 x i16> [[TMP10]], trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 2, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>) +; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 8 x i16> [[TMP10]], trunc (<vscale x 8 x i32> splat (i32 2) to <vscale x 8 x i16>) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] ; CHECK-NEXT: store <vscale x 8 x i16> [[TMP11]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll index 05bf841..b2695cb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll @@ -29,7 +29,7 @@ define void @acos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @acos_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acos(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acos(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @acos(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @acos_f64 @@ -42,7 +42,7 @@ define void @acos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @acos_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svacos_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svacos_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @acos(double [[IN:%.*]]) ; entry: @@ -74,7 +74,7 @@ define void @acos_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @acos_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acosf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acosf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @acosf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @acos_f32 @@ -87,7 +87,7 @@ define void @acos_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @acos_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svacos_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svacos_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @acosf(float [[IN:%.*]]) ; entry: @@ -122,7 +122,7 @@ define void @acosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @acosh_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acosh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acosh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @acosh(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @acosh_f64 @@ -135,7 +135,7 @@ define void @acosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @acosh_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svacosh_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svacosh_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @acosh(double [[IN:%.*]]) ; entry: @@ -167,7 +167,7 @@ define void @acosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @acosh_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acoshf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acoshf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @acoshf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @acosh_f32 @@ -180,7 +180,7 @@ define void @acosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @acosh_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svacosh_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svacosh_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @acoshf(float [[IN:%.*]]) ; entry: @@ -215,7 +215,7 @@ define void @asin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @asin_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asin(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asin(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @asin(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @asin_f64 @@ -228,7 +228,7 @@ define void @asin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @asin_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svasin_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svasin_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @asin(double [[IN:%.*]]) ; entry: @@ -260,7 +260,7 @@ define void @asin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @asin_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @asinf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @asin_f32 @@ -273,7 +273,7 @@ define void @asin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @asin_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svasin_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svasin_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @asinf(float [[IN:%.*]]) ; entry: @@ -308,7 +308,7 @@ define void @asinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @asinh_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asinh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asinh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @asinh(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @asinh_f64 @@ -321,7 +321,7 @@ define void @asinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @asinh_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svasinh_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svasinh_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @asinh(double [[IN:%.*]]) ; entry: @@ -353,7 +353,7 @@ define void @asinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @asinh_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinhf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinhf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @asinhf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @asinh_f32 @@ -366,7 +366,7 @@ define void @asinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @asinh_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svasinh_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svasinh_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @asinhf(float [[IN:%.*]]) ; entry: @@ -401,7 +401,7 @@ define void @atan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @atan_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atan(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atan(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @atan(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @atan_f64 @@ -414,7 +414,7 @@ define void @atan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @atan_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svatan_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svatan_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @atan(double [[IN:%.*]]) ; entry: @@ -446,7 +446,7 @@ define void @atan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @atan_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @atanf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @atan_f32 @@ -459,7 +459,7 @@ define void @atan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @atan_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svatan_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svatan_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @atanf(float [[IN:%.*]]) ; entry: @@ -494,7 +494,7 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @atan2_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @atan2(double [[IN:%.*]], double [[IN]]) ; ; ARMPL-NEON-LABEL: define void @atan2_f64 @@ -507,7 +507,7 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @atan2_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svatan2_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svatan2_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @atan2(double [[IN:%.*]], double [[IN]]) ; entry: @@ -539,7 +539,7 @@ define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @atan2_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @atan2f(float [[IN:%.*]], float [[IN]]) ; ; ARMPL-NEON-LABEL: define void @atan2_f32 @@ -552,7 +552,7 @@ define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @atan2_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svatan2_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svatan2_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @atan2f(float [[IN:%.*]], float [[IN]]) ; entry: @@ -587,7 +587,7 @@ define void @atanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @atanh_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atanh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atanh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @atanh(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @atanh_f64 @@ -600,7 +600,7 @@ define void @atanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @atanh_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svatanh_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svatanh_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @atanh(double [[IN:%.*]]) ; entry: @@ -632,7 +632,7 @@ define void @atanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @atanh_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanhf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanhf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @atanhf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @atanh_f32 @@ -645,7 +645,7 @@ define void @atanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @atanh_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svatanh_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svatanh_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @atanhf(float [[IN:%.*]]) ; entry: @@ -680,7 +680,7 @@ define void @cbrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @cbrt_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cbrt(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cbrt(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @cbrt(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @cbrt_f64 @@ -693,7 +693,7 @@ define void @cbrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @cbrt_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svcbrt_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svcbrt_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @cbrt(double [[IN:%.*]]) ; entry: @@ -725,7 +725,7 @@ define void @cbrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @cbrt_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cbrtf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cbrtf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @cbrtf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @cbrt_f32 @@ -738,7 +738,7 @@ define void @cbrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @cbrt_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svcbrt_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svcbrt_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @cbrtf(float [[IN:%.*]]) ; entry: @@ -773,7 +773,7 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @copysign_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_copysign(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_copysign(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) ; ; ARMPL-NEON-LABEL: define void @copysign_f64 @@ -786,7 +786,7 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @copysign_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svcopysign_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svcopysign_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) ; entry: @@ -818,7 +818,7 @@ define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @copysign_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_copysignf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_copysignf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) ; ; ARMPL-NEON-LABEL: define void @copysign_f32 @@ -831,7 +831,7 @@ define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @copysign_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svcopysign_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svcopysign_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) ; entry: @@ -866,7 +866,7 @@ define void @cos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @cos_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @cos(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @cos_f64 @@ -879,7 +879,7 @@ define void @cos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @cos_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @cos(double [[IN:%.*]]) ; entry: @@ -911,7 +911,7 @@ define void @cos_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @cos_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @cosf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @cos_f32 @@ -924,7 +924,7 @@ define void @cos_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @cos_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @cosf(float [[IN:%.*]]) ; entry: @@ -959,7 +959,7 @@ define void @cosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @cosh_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @cosh(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @cosh_f64 @@ -972,7 +972,7 @@ define void @cosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @cosh_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svcosh_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svcosh_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @cosh(double [[IN:%.*]]) ; entry: @@ -1004,7 +1004,7 @@ define void @cosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @cosh_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_coshf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_coshf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @coshf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @cosh_f32 @@ -1017,7 +1017,7 @@ define void @cosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @cosh_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svcosh_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svcosh_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @coshf(float [[IN:%.*]]) ; entry: @@ -1052,7 +1052,7 @@ define void @cospi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @cospi_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cospi(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cospi(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @cospi_f64 @@ -1065,7 +1065,7 @@ define void @cospi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @cospi_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svcospi_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svcospi_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]]) ; entry: @@ -1097,7 +1097,7 @@ define void @cospi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @cospi_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cospif(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cospif(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @cospi_f32 @@ -1110,7 +1110,7 @@ define void @cospi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @cospi_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svcospi_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svcospi_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]]) ; entry: @@ -1145,7 +1145,7 @@ define void @erf_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @erf_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_erf(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_erf(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @erf(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @erf_f64 @@ -1158,7 +1158,7 @@ define void @erf_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @erf_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_sverf_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_sverf_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @erf(double [[IN:%.*]]) ; entry: @@ -1190,7 +1190,7 @@ define void @erf_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @erf_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_erff(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_erff(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @erff(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @erf_f32 @@ -1203,7 +1203,7 @@ define void @erf_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @erf_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_sverf_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_sverf_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @erff(float [[IN:%.*]]) ; entry: @@ -1238,7 +1238,7 @@ define void @erfc_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @erfc_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_erfc(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_erfc(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @erfc(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @erfc_f64 @@ -1251,7 +1251,7 @@ define void @erfc_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @erfc_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_sverfc_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_sverfc_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @erfc(double [[IN:%.*]]) ; entry: @@ -1283,7 +1283,7 @@ define void @erfc_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @erfc_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_erfcf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_erfcf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @erfcf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @erfc_f32 @@ -1296,7 +1296,7 @@ define void @erfc_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @erfc_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_sverfc_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_sverfc_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @erfcf(float [[IN:%.*]]) ; entry: @@ -1331,7 +1331,7 @@ define void @exp_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @exp_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @exp(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @exp_f64 @@ -1344,7 +1344,7 @@ define void @exp_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @exp_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @exp(double [[IN:%.*]]) ; entry: @@ -1376,7 +1376,7 @@ define void @exp_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @exp_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @expf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @exp_f32 @@ -1389,7 +1389,7 @@ define void @exp_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @exp_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @expf(float [[IN:%.*]]) ; entry: @@ -1424,7 +1424,7 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @exp10_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @exp10(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @exp10_f64 @@ -1437,7 +1437,7 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @exp10_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @exp10(double [[IN:%.*]]) ; entry: @@ -1469,7 +1469,7 @@ define void @exp10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @exp10_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @exp10f(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @exp10_f32 @@ -1482,7 +1482,7 @@ define void @exp10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @exp10_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @exp10f(float [[IN:%.*]]) ; entry: @@ -1517,7 +1517,7 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @exp2_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @exp2(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @exp2_f64 @@ -1530,7 +1530,7 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @exp2_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @exp2(double [[IN:%.*]]) ; entry: @@ -1562,7 +1562,7 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @exp2_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @exp2f(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @exp2_f32 @@ -1575,7 +1575,7 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @exp2_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @exp2f(float [[IN:%.*]]) ; entry: @@ -1610,7 +1610,7 @@ define void @expm1_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @expm1_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_expm1(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_expm1(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @expm1(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @expm1_f64 @@ -1623,7 +1623,7 @@ define void @expm1_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @expm1_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svexpm1_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svexpm1_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @expm1(double [[IN:%.*]]) ; entry: @@ -1655,7 +1655,7 @@ define void @expm1_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @expm1_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expm1f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expm1f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @expm1f(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @expm1_f32 @@ -1668,7 +1668,7 @@ define void @expm1_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @expm1_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svexpm1_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svexpm1_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @expm1f(float [[IN:%.*]]) ; entry: @@ -1703,7 +1703,7 @@ define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @fdim_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fdim(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fdim(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) ; ; ARMPL-NEON-LABEL: define void @fdim_f64 @@ -1716,7 +1716,7 @@ define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @fdim_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svfdim_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svfdim_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) ; entry: @@ -1748,7 +1748,7 @@ define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @fdim_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fdimf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fdimf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) ; ; ARMPL-NEON-LABEL: define void @fdim_f32 @@ -1761,7 +1761,7 @@ define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @fdim_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svfdim_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svfdim_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) ; entry: @@ -1796,7 +1796,7 @@ define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @fma_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvvv_fma(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvvv_fma(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) ; ; ARMPL-NEON-LABEL: define void @fma_f64 @@ -1809,7 +1809,7 @@ define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @fma_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svfma_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svfma_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) ; entry: @@ -1841,7 +1841,7 @@ define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @fma_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvvv_fmaf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvvv_fmaf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) ; ; ARMPL-NEON-LABEL: define void @fma_f32 @@ -1854,7 +1854,7 @@ define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @fma_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svfma_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svfma_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) ; entry: @@ -1889,7 +1889,7 @@ define void @fmax_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @fmax_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmax(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmax(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]]) ; ; ARMPL-NEON-LABEL: define void @fmax_f64 @@ -1902,7 +1902,7 @@ define void @fmax_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @fmax_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svfmax_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svfmax_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]]) ; entry: @@ -1934,7 +1934,7 @@ define void @fmax_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @fmax_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmaxf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmaxf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]]) ; ; ARMPL-NEON-LABEL: define void @fmax_f32 @@ -1947,7 +1947,7 @@ define void @fmax_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @fmax_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svfmax_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svfmax_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]]) ; entry: @@ -1982,7 +1982,7 @@ define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @fmin_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmin(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmin(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) ; ; ARMPL-NEON-LABEL: define void @fmin_f64 @@ -1995,7 +1995,7 @@ define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @fmin_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svfmin_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svfmin_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) ; entry: @@ -2027,7 +2027,7 @@ define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @fmin_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fminf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fminf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) ; ; ARMPL-NEON-LABEL: define void @fmin_f32 @@ -2040,7 +2040,7 @@ define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @fmin_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svfmin_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svfmin_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) ; entry: @@ -2075,7 +2075,7 @@ define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @fmod_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmod(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmod(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @fmod(double [[IN:%.*]], double [[IN]]) ; ; ARMPL-NEON-LABEL: define void @fmod_f64 @@ -2088,7 +2088,7 @@ define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @fmod_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svfmod_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svfmod_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @fmod(double [[IN:%.*]], double [[IN]]) ; entry: @@ -2120,7 +2120,7 @@ define void @fmod_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @fmod_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmodf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmodf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @fmodf(float [[IN:%.*]], float [[IN]]) ; ; ARMPL-NEON-LABEL: define void @fmod_f32 @@ -2133,7 +2133,7 @@ define void @fmod_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @fmod_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svfmod_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svfmod_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @fmodf(float [[IN:%.*]], float [[IN]]) ; entry: @@ -2168,7 +2168,7 @@ define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @hypot_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_hypot(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_hypot(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]]) ; ; ARMPL-NEON-LABEL: define void @hypot_f64 @@ -2181,7 +2181,7 @@ define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @hypot_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svhypot_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svhypot_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]]) ; entry: @@ -2213,7 +2213,7 @@ define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @hypot_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_hypotf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_hypotf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]]) ; ; ARMPL-NEON-LABEL: define void @hypot_f32 @@ -2226,7 +2226,7 @@ define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @hypot_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svhypot_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svhypot_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]]) ; entry: @@ -2261,7 +2261,7 @@ define void @ilogb_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @ilogb_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x i32> @_ZGVsMxv_ilogb(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x i32> @_ZGVsMxv_ilogb(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @ilogb_f64 @@ -2274,7 +2274,7 @@ define void @ilogb_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @ilogb_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x i32> @armpl_svilogb_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x i32> @armpl_svilogb_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]]) ; entry: @@ -2306,7 +2306,7 @@ define void @ilogb_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @ilogb_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x i32> @_ZGVsMxv_ilogbf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x i32> @_ZGVsMxv_ilogbf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @ilogb_f32 @@ -2319,7 +2319,7 @@ define void @ilogb_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @ilogb_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x i32> @armpl_svilogb_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x i32> @armpl_svilogb_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]]) ; entry: @@ -2354,7 +2354,7 @@ define void @ldexp_f64(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias % ; ; SLEEF-SVE-NOPRED-LABEL: define void @ldexp_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP11:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_ldexp(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i32> [[WIDE_LOAD1:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP11:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_ldexp(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i32> [[WIDE_LOAD1:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]]) ; ; ARMPL-NEON-LABEL: define void @ldexp_f64 @@ -2367,7 +2367,7 @@ define void @ldexp_f64(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias % ; ; ARMPL-SVE-NOPRED-LABEL: define void @ldexp_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP11:%.*]] = call <vscale x 2 x double> @armpl_svldexp_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i32> [[WIDE_LOAD1:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP11:%.*]] = call <vscale x 2 x double> @armpl_svldexp_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i32> [[WIDE_LOAD1:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]]) ; entry: @@ -2401,7 +2401,7 @@ define void @ldexp_f32(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias % ; ; SLEEF-SVE-NOPRED-LABEL: define void @ldexp_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP11:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_ldexpf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i32> [[WIDE_LOAD1:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP11:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_ldexpf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i32> [[WIDE_LOAD1:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]]) ; ; ARMPL-NEON-LABEL: define void @ldexp_f32 @@ -2414,7 +2414,7 @@ define void @ldexp_f32(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias % ; ; ARMPL-SVE-NOPRED-LABEL: define void @ldexp_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP11:%.*]] = call <vscale x 4 x float> @armpl_svldexp_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i32> [[WIDE_LOAD1:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP11:%.*]] = call <vscale x 4 x float> @armpl_svldexp_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i32> [[WIDE_LOAD1:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]]) ; entry: @@ -2451,7 +2451,7 @@ define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @lgamma_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_lgamma(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_lgamma(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @lgamma(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @lgamma_f64 @@ -2464,7 +2464,7 @@ define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @lgamma_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svlgamma_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svlgamma_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @lgamma(double [[IN:%.*]]) ; entry: @@ -2496,7 +2496,7 @@ define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @lgamma_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_lgammaf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_lgammaf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @lgammaf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @lgamma_f32 @@ -2509,7 +2509,7 @@ define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @lgamma_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svlgamma_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svlgamma_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @lgammaf(float [[IN:%.*]]) ; entry: @@ -2544,7 +2544,7 @@ define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @log_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @log(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @log_f64 @@ -2557,7 +2557,7 @@ define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @log_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @log(double [[IN:%.*]]) ; entry: @@ -2589,7 +2589,7 @@ define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @log_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @logf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @log_f32 @@ -2602,7 +2602,7 @@ define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @log_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @logf(float [[IN:%.*]]) ; entry: @@ -2637,7 +2637,7 @@ define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @log10_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @log10(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @log10_f64 @@ -2650,7 +2650,7 @@ define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @log10_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @log10(double [[IN:%.*]]) ; entry: @@ -2682,7 +2682,7 @@ define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @log10_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @log10f(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @log10_f32 @@ -2695,7 +2695,7 @@ define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @log10_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @log10f(float [[IN:%.*]]) ; entry: @@ -2730,7 +2730,7 @@ define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @log1p_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log1p(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log1p(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @log1p_f64 @@ -2743,7 +2743,7 @@ define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @log1p_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svlog1p_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svlog1p_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]]) ; entry: @@ -2775,7 +2775,7 @@ define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @log1p_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log1pf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log1pf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @log1p_f32 @@ -2788,7 +2788,7 @@ define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @log1p_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svlog1p_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svlog1p_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]]) ; entry: @@ -2823,7 +2823,7 @@ define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @log2_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @log2(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @log2_f64 @@ -2836,7 +2836,7 @@ define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @log2_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @log2(double [[IN:%.*]]) ; entry: @@ -2868,7 +2868,7 @@ define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @log2_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @log2f(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @log2_f32 @@ -2881,7 +2881,7 @@ define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @log2_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @log2f(float [[IN:%.*]]) ; entry: @@ -3010,7 +3010,7 @@ define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @nextafter_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_nextafter(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_nextafter(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) ; ; ARMPL-NEON-LABEL: define void @nextafter_f64 @@ -3023,7 +3023,7 @@ define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @nextafter_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svnextafter_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svnextafter_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) ; entry: @@ -3055,7 +3055,7 @@ define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @nextafter_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_nextafterf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_nextafterf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) ; ; ARMPL-NEON-LABEL: define void @nextafter_f32 @@ -3068,7 +3068,7 @@ define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @nextafter_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svnextafter_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svnextafter_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) ; entry: @@ -3103,7 +3103,7 @@ define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @pow_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @pow(double [[IN:%.*]], double [[IN]]) ; ; ARMPL-NEON-LABEL: define void @pow_f64 @@ -3116,7 +3116,7 @@ define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @pow_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @pow(double [[IN:%.*]], double [[IN]]) ; entry: @@ -3148,7 +3148,7 @@ define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @pow_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @powf(float [[IN:%.*]], float [[IN]]) ; ; ARMPL-NEON-LABEL: define void @pow_f32 @@ -3161,7 +3161,7 @@ define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @pow_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @powf(float [[IN:%.*]], float [[IN]]) ; entry: @@ -3196,7 +3196,7 @@ define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @sin_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @sin(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sin_f64 @@ -3209,7 +3209,7 @@ define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @sin_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @sin(double [[IN:%.*]]) ; entry: @@ -3241,7 +3241,7 @@ define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @sin_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @sinf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sin_f32 @@ -3254,7 +3254,7 @@ define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @sin_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @sinf(float [[IN:%.*]]) ; entry: @@ -3473,7 +3473,7 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @sinh_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @sinh(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sinh_f64 @@ -3486,7 +3486,7 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @sinh_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svsinh_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svsinh_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @sinh(double [[IN:%.*]]) ; entry: @@ -3518,7 +3518,7 @@ define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @sinh_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @sinhf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sinh_f32 @@ -3531,7 +3531,7 @@ define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @sinh_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svsinh_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svsinh_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @sinhf(float [[IN:%.*]]) ; entry: @@ -3566,7 +3566,7 @@ define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @sinpi_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinpi(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinpi(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sinpi_f64 @@ -3579,7 +3579,7 @@ define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @sinpi_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svsinpi_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svsinpi_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) ; entry: @@ -3611,7 +3611,7 @@ define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @sinpi_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinpif(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinpif(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sinpi_f32 @@ -3624,7 +3624,7 @@ define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @sinpi_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svsinpi_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svsinpi_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) ; entry: @@ -3659,7 +3659,7 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @sqrt_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sqrt(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sqrt(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sqrt_f64 @@ -3672,7 +3672,7 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @sqrt_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svsqrt_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svsqrt_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]]) ; entry: @@ -3704,7 +3704,7 @@ define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @sqrt_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sqrtf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sqrtf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @sqrt_f32 @@ -3717,7 +3717,7 @@ define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @sqrt_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svsqrt_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svsqrt_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]]) ; entry: @@ -3752,7 +3752,7 @@ define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @tan_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @tan(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @tan_f64 @@ -3765,7 +3765,7 @@ define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @tan_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svtan_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svtan_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @tan(double [[IN:%.*]]) ; entry: @@ -3797,7 +3797,7 @@ define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @tan_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @tanf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @tan_f32 @@ -3810,7 +3810,7 @@ define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @tan_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svtan_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svtan_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @tanf(float [[IN:%.*]]) ; entry: @@ -3845,7 +3845,7 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @tanh_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @tanh(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @tanh_f64 @@ -3858,7 +3858,7 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @tanh_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svtanh_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svtanh_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @tanh(double [[IN:%.*]]) ; entry: @@ -3890,7 +3890,7 @@ define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @tanh_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @tanhf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @tanh_f32 @@ -3903,7 +3903,7 @@ define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @tanh_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svtanh_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svtanh_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @tanhf(float [[IN:%.*]]) ; entry: @@ -3938,7 +3938,7 @@ define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @tgamma_f64 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tgamma(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tgamma(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @tgamma_f64 @@ -3951,7 +3951,7 @@ define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @tgamma_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svtgamma_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 2 x double> @armpl_svtgamma_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]]) ; entry: @@ -3983,7 +3983,7 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; SLEEF-SVE-NOPRED-LABEL: define void @tgamma_f32 ; SLEEF-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tgammaf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; SLEEF-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tgammaf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; SLEEF-SVE-NOPRED: [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]]) ; ; ARMPL-NEON-LABEL: define void @tgamma_f32 @@ -3996,7 +3996,7 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ; ARMPL-SVE-NOPRED-LABEL: define void @tgamma_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svtgamma_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call <vscale x 4 x float> @armpl_svtgamma_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true)) ; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]]) ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll index 4a3bc46..f178805 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll @@ -21,7 +21,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 { ; WIDE-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[INDEX]] ; WIDE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x double>, ptr [[TMP4]], align 8 ; WIDE-NEXT: [[TMP5:%.*]] = fptrunc <vscale x 4 x double> [[WIDE_LOAD]] to <vscale x 4 x float> -; WIDE-NEXT: [[TMP6:%.*]] = call <vscale x 4 x float> @foo_vector(<vscale x 4 x float> [[TMP5]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; WIDE-NEXT: [[TMP6:%.*]] = call <vscale x 4 x float> @foo_vector(<vscale x 4 x float> [[TMP5]], <vscale x 4 x i1> splat (i1 true)) ; WIDE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; WIDE-NEXT: store <vscale x 4 x float> [[TMP6]], ptr [[TMP7]], align 4 ; WIDE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll index 7db47cb..130ef75 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll @@ -83,8 +83,8 @@ define i32 @any_of_reduction_used_in_blend_with_mutliple_phis(ptr %src, i64 %N, ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = xor <vscale x 2 x i1> [[BROADCAST_SPLAT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = xor <vscale x 2 x i1> [[BROADCAST_SPLAT2]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP6:%.*]] = xor <vscale x 2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = xor <vscale x 2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i1> zeroinitializer ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT4]], i32 8, <vscale x 2 x i1> [[TMP8]], <vscale x 2 x ptr> poison) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <vscale x 2 x ptr> [[WIDE_MASKED_GATHER]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 9a094d8..2e0eee1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -27,7 +27,7 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 ; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 8 x i64> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0 @@ -42,7 +42,7 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -122,7 +122,7 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0 @@ -137,7 +137,7 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -217,7 +217,7 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0 @@ -232,7 +232,7 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -322,7 +322,7 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 ; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 8 x i64> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0 @@ -337,7 +337,7 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -429,7 +429,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0 @@ -444,7 +444,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -544,7 +544,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 ; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 8 x i64> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0 @@ -559,7 +559,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -664,7 +664,7 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i16>, ptr [[TMP11]], align 2 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <vscale x 8 x i16> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP12]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 99, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i16> [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP12]], <vscale x 8 x i16> splat (i16 99), <vscale x 8 x i16> [[WIDE_LOAD]] ; CHECK-NEXT: store <vscale x 8 x i16> [[PREDPHI]], ptr [[TMP11]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -753,8 +753,8 @@ define void @empty_block_with_phi_2(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i16>, ptr [[TMP11]], align 2 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <vscale x 8 x i16> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = xor <vscale x 8 x i1> [[TMP12]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 99, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i16> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <vscale x 8 x i1> [[TMP12]], splat (i1 true) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i16> splat (i16 99), <vscale x 8 x i16> [[WIDE_LOAD]] ; CHECK-NEXT: store <vscale x 8 x i16> [[PREDPHI]], ptr [[TMP11]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -862,7 +862,7 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 { ; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP19:%.*]] = add <vscale x 2 x i64> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = mul <vscale x 2 x i64> [[TMP19]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 3, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = mul <vscale x 2 x i64> [[TMP19]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP20]] ; CHECK-NEXT: [[TMP23:%.*]] = mul i64 3, [[TMP17]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP23]], i64 0 @@ -872,7 +872,7 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 2 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> [[TMP24]], i32 4, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)), !alias.scope [[META18:![0-9]+]], !noalias [[META21:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> [[TMP24]], i32 4, <vscale x 2 x i1> splat (i1 true)), !alias.scope [[META18:![0-9]+]], !noalias [[META21:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 6724afd..37cb557 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -37,7 +37,7 @@ define void @dead_load(ptr %p, i16 %start) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 8 x i64> [[TMP15]], zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 8 x i64> [[TMP16]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 8 x i64> [[TMP16]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP14]] ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP20]], i64 0 @@ -47,7 +47,7 @@ define void @dead_load(ptr %p, i16 %start) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[P]], <vscale x 8 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP21]], i32 2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP21]], i32 2, <vscale x 8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -117,7 +117,7 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i32> [[TMP10]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 4, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i32> [[TMP10]], splat (i32 4) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = mul i32 4, [[TMP8]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP14]], i64 0 @@ -128,7 +128,7 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP15:%.*]] = sext <vscale x 4 x i32> [[VEC_IND]] to <vscale x 4 x i64> ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], <vscale x 4 x i64> [[TMP15]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x ptr> [[TMP16]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)), !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x ptr> [[TMP16]], i32 1, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -341,10 +341,10 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = xor <vscale x 2 x i32> [[BROADCAST_SPLAT]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 -1, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = xor <vscale x 2 x i32> [[BROADCAST_SPLAT]], splat (i32 -1) ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP7]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 9, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP7]], splat (i64 9) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 9, [[TMP5]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0 @@ -354,7 +354,7 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 2 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> [[TMP10]], <vscale x 2 x ptr> [[TMP11]], i32 4, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> [[TMP10]], <vscale x 2 x ptr> [[TMP11]], i32 4, <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -427,7 +427,7 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i64> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i64> [[TMP10]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i64> [[TMP10]], splat (i64 2) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP8]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0 @@ -445,7 +445,7 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i8> } [[STRIDED_VEC]], 1 ; CHECK-NEXT: [[TMP17:%.*]] = zext <vscale x 4 x i8> [[TMP16]] to <vscale x 4 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 4 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP17]], <vscale x 4 x ptr> [[TMP18]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP17]], <vscale x 4 x ptr> [[TMP18]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index ab62add..a7765f4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -439,7 +439,7 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP9]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = udiv <vscale x 2 x i64> [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[WIDE_LOAD]] ; CHECK-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP9]], align 8 @@ -567,7 +567,7 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP9]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = sdiv <vscale x 2 x i64> [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[WIDE_LOAD]] ; CHECK-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP9]], align 8 @@ -692,8 +692,8 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <vscale x 2 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 42, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP10:%.*]] = udiv <vscale x 2 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 27, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 42) +; CHECK-NEXT: [[TMP10:%.*]] = udiv <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 27) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x i64> [[TMP10]], <vscale x 2 x i64> [[WIDE_LOAD]] ; CHECK-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -814,8 +814,8 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <vscale x 2 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 42, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP10:%.*]] = sdiv <vscale x 2 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 27, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 42) +; CHECK-NEXT: [[TMP10:%.*]] = sdiv <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 27) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x i64> [[TMP10]], <vscale x 2 x i64> [[WIDE_LOAD]] ; CHECK-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -936,8 +936,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP8]], align 1 -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 -128, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 -1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 -128) +; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> splat (i8 -1), <vscale x 16 x i8> splat (i8 1) ; CHECK-NEXT: [[TMP11:%.*]] = sdiv <vscale x 16 x i8> [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> [[TMP11]], <vscale x 16 x i8> [[WIDE_LOAD]] ; CHECK-NEXT: store <vscale x 16 x i8> [[PREDPHI]], ptr [[TMP8]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index 49e9abc..b303df7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -74,7 +74,7 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP53:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP54:%.*]] = add <vscale x 8 x i64> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = mul <vscale x 8 x i64> [[TMP54]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP55:%.*]] = mul <vscale x 8 x i64> [[TMP54]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP55]] ; CHECK-NEXT: [[TMP58:%.*]] = mul i64 3, [[TMP52]] ; CHECK-NEXT: [[DOTSPLATINSERT24:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP58]], i64 0 @@ -84,7 +84,7 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i16, ptr [[A]], <vscale x 8 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP59]], i32 2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)), !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP59]], i32 2, <vscale x 8 x i1> splat (i1 true)), !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP52]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT25]] ; CHECK-NEXT: [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 8ca2bd1..8368022 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -142,10 +142,10 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = add i32 [[EVL_BASED_IV]], 0 ; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]] ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0 -; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr align 2 [[TMP8]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]]) +; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr align 2 [[TMP8]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i16> [[VP_OP_LOAD]] to <vscale x 4 x i32> -; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP9]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP5]]) +; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP9]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) +; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP5]]) ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP5]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]] ; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -199,9 +199,9 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = add i32 [[EVL_BASED_IV]], 0 ; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]] ; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0 -; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP8]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP5]]) +; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP8]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]]) ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32> -; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP9]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP5]]) +; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP9]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]]) ; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP5]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]] @@ -376,10 +376,10 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 -; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP9]]) +; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) +; IF-EVL-OUTLOOP-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -432,8 +432,8 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] ; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 -; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP9]]) -; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 2147483647, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP9]]) +; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) +; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 2147483647, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) ; IF-EVL-INLOOP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP13]], i32 [[VEC_PHI]]) ; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index ad10ddc..bda4839 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -27,8 +27,8 @@ define void @load_store_factor2_i32(ptr %p) { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i32> [[TMP11]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], splat (i32 1) +; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i32> [[TMP11]], splat (i32 2) ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP12]], <vscale x 4 x i32> [[TMP15]]) ; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -125,8 +125,8 @@ define void @load_store_factor2_i32(ptr %p) { ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 ; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1 -; SCALABLE-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i32> [[TMP11]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP10]], splat (i32 1) +; SCALABLE-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i32> [[TMP11]], splat (i32 2) ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP12]], <vscale x 4 x i32> [[TMP15]]) ; SCALABLE-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -204,8 +204,8 @@ define void @load_store_factor2_i64(ptr %p) { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], splat (i64 1) +; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i64> [[TMP11]], splat (i64 2) ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP12]], <vscale x 2 x i64> [[TMP15]]) ; CHECK-NEXT: store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -302,8 +302,8 @@ define void @load_store_factor2_i64(ptr %p) { ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> [[WIDE_VEC]]) ; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 0 ; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[STRIDED_VEC]], 1 -; SCALABLE-NEXT: [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) -; SCALABLE-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP12:%.*]] = add <vscale x 2 x i64> [[TMP10]], splat (i64 1) +; SCALABLE-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i64> [[TMP11]], splat (i64 2) ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> [[TMP12]], <vscale x 2 x i64> [[TMP15]]) ; SCALABLE-NEXT: store <vscale x 4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll index b0aefae..bef8abe 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll @@ -23,7 +23,7 @@ define void @load_store(ptr %p) { ; LMUL1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP2]] ; LMUL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8 -; LMUL1-NEXT: [[TMP5:%.*]] = add <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer) +; LMUL1-NEXT: [[TMP5:%.*]] = add <vscale x 1 x i64> [[WIDE_LOAD]], splat (i64 1) ; LMUL1-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP4]], align 8 ; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; LMUL1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -66,7 +66,7 @@ define void @load_store(ptr %p) { ; LMUL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP4]] ; LMUL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; LMUL2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 -; LMUL2-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; LMUL2-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 1) ; LMUL2-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP6]], align 8 ; LMUL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; LMUL2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -109,7 +109,7 @@ define void @load_store(ptr %p) { ; LMUL4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP4]] ; LMUL4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; LMUL4-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i64>, ptr [[TMP6]], align 8 -; LMUL4-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; LMUL4-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i64> [[WIDE_LOAD]], splat (i64 1) ; LMUL4-NEXT: store <vscale x 4 x i64> [[TMP7]], ptr [[TMP6]], align 8 ; LMUL4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; LMUL4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -152,7 +152,7 @@ define void @load_store(ptr %p) { ; LMUL8-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP4]] ; LMUL8-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; LMUL8-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, ptr [[TMP6]], align 8 -; LMUL8-NEXT: [[TMP7:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer) +; LMUL8-NEXT: [[TMP7:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], splat (i64 1) ; LMUL8-NEXT: store <vscale x 8 x i64> [[TMP7]], ptr [[TMP6]], align 8 ; LMUL8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; LMUL8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index 07a5c53..44c5348 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -63,7 +63,7 @@ define void @trip3_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP9]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison) -; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP12]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison) @@ -132,7 +132,7 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) -; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i8> insertelement (<vscale x 4 x i8> poison, i8 1, i64 0), <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) @@ -201,7 +201,7 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) -; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i8> insertelement (<vscale x 4 x i8> poison, i8 1, i64 0), <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index 646b18a..919e741 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -24,7 +24,7 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; VLENUNK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; VLENUNK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i64> [[TMP6]], zeroinitializer -; VLENUNK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; VLENUNK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], splat (i64 1) ; VLENUNK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]] ; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]] ; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0 @@ -36,11 +36,11 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VLENUNK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VLENUNK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP13:%.*]] = icmp ult <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 512, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; VLENUNK-NEXT: [[TMP13:%.*]] = icmp ult <vscale x 4 x i64> [[VEC_IND]], splat (i64 512) ; VLENUNK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP12]] ; VLENUNK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 ; VLENUNK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP15]], i32 4, <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> poison) -; VLENUNK-NEXT: [[TMP16:%.*]] = xor <vscale x 4 x i1> [[TMP13]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; VLENUNK-NEXT: [[TMP16:%.*]] = xor <vscale x 4 x i1> [[TMP13]], splat (i1 true) ; VLENUNK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_LOAD]] ; VLENUNK-NEXT: [[TMP17:%.*]] = add <vscale x 4 x i32> [[PREDPHI]], [[BROADCAST_SPLAT]] ; VLENUNK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP12]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index ff23d1c..ea376fe 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -43,7 +43,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; RV32-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; RV32-NEXT: [[TMP8:%.*]] = add <vscale x 2 x i64> [[TMP7]], zeroinitializer -; RV32-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i64> [[TMP8]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 16, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; RV32-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i64> [[TMP8]], splat (i64 16) ; RV32-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP9]] ; RV32-NEXT: [[TMP12:%.*]] = mul i64 16, [[TMP6]] ; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP12]], i64 0 @@ -53,9 +53,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV32-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV32-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], <vscale x 2 x i64> [[VEC_IND]] -; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[TMP13]], i32 4, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i32> poison), !alias.scope [[META0:![0-9]+]] -; RV32-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 2 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 100, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer) -; RV32-NEXT: [[TMP15:%.*]] = shl nuw nsw <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[TMP13]], i32 4, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i32> poison), !alias.scope [[META0:![0-9]+]] +; RV32-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 2 x i32> [[WIDE_MASKED_GATHER]], splat (i32 100) +; RV32-NEXT: [[TMP15:%.*]] = shl nuw nsw <vscale x 2 x i64> [[VEC_IND]], splat (i64 1) ; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[B]], <vscale x 2 x i64> [[TMP15]] ; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP16]], i32 8, <vscale x 2 x i1> [[TMP14]], <vscale x 2 x double> poison), !alias.scope [[META3:![0-9]+]] ; RV32-NEXT: [[TMP17:%.*]] = sitofp <vscale x 2 x i32> [[WIDE_MASKED_GATHER]] to <vscale x 2 x double> @@ -123,7 +123,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; RV64-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; RV64-NEXT: [[TMP8:%.*]] = add <vscale x 2 x i64> [[TMP7]], zeroinitializer -; RV64-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i64> [[TMP8]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 16, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; RV64-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i64> [[TMP8]], splat (i64 16) ; RV64-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP9]] ; RV64-NEXT: [[TMP12:%.*]] = mul i64 16, [[TMP6]] ; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP12]], i64 0 @@ -133,9 +133,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV64-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV64-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], <vscale x 2 x i64> [[VEC_IND]] -; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[TMP13]], i32 4, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i32> poison), !alias.scope [[META0:![0-9]+]] -; RV64-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 2 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 100, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer) -; RV64-NEXT: [[TMP15:%.*]] = shl nuw nsw <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[TMP13]], i32 4, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i32> poison), !alias.scope [[META0:![0-9]+]] +; RV64-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 2 x i32> [[WIDE_MASKED_GATHER]], splat (i32 100) +; RV64-NEXT: [[TMP15:%.*]] = shl nuw nsw <vscale x 2 x i64> [[VEC_IND]], splat (i64 1) ; RV64-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[B]], <vscale x 2 x i64> [[TMP15]] ; RV64-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP16]], i32 8, <vscale x 2 x i1> [[TMP14]], <vscale x 2 x double> poison), !alias.scope [[META3:![0-9]+]] ; RV64-NEXT: [[TMP17:%.*]] = sitofp <vscale x 2 x i32> [[WIDE_MASKED_GATHER]] to <vscale x 2 x double> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index eb3f39f..9950164 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -22,7 +22,7 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 8 x i64> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 8 x i64> [[TMP7]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 8 x i64> [[TMP7]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP11]], i64 0 @@ -41,14 +41,14 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ule <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp ule <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP14]], <vscale x 8 x i1> zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 8 x i1> [[TMP13]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 8 x i1> [[TMP13]], splat (i1 true) ; CHECK-NEXT: [[TMP17:%.*]] = or <vscale x 8 x i1> [[TMP15]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP19:%.*]] = select <vscale x 8 x i1> [[TMP17]], <vscale x 8 x i1> [[TMP18]], <vscale x 8 x i1> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = xor <vscale x 8 x i1> [[TMP14]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = xor <vscale x 8 x i1> [[TMP14]], splat (i1 true) ; CHECK-NEXT: [[TMP21:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP20]], <vscale x 8 x i1> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = or <vscale x 8 x i1> [[TMP19]], [[TMP21]] -; CHECK-NEXT: [[EXT:%.+]] = extractelement <vscale x 8 x i1> [[TMP19]], i32 0 +; CHECK-NEXT: [[EXT:%.*]] = extractelement <vscale x 8 x i1> [[TMP19]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[EXT]], i64 [[TMP12]], i64 poison ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[TMP24]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll index cb27dc4..a6c1571 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll @@ -313,7 +313,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 ; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]] -; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> splat (i1 true)) ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -358,7 +358,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 ; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]] -; VLEN128-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)) +; VLEN128-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> splat (i1 true)) ; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -420,7 +420,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 ; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]] -; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> poison) +; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> poison) ; VLENUNK-NEXT: [[TMP8]] = add <vscale x 2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; VLENUNK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -470,7 +470,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLEN128-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 8 ; VLEN128-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]] -; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP9]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> poison) +; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP9]], i32 8, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> poison) ; VLEN128-NEXT: [[TMP10]] = add <vscale x 2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] ; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; VLEN128-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index d514263..9a79b91 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -51,7 +51,7 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], splat (i1 true) ; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -126,7 +126,7 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], splat (i1 true) ; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -196,8 +196,8 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) +; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], splat (i1 true) ; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -267,8 +267,8 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) +; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], splat (i1 true) ; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -338,8 +338,8 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast ueq <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast ueq <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], splat (i1 true) ; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -438,11 +438,11 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP8:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 35) ; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0 ; SCALABLE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> poison) -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 2) ; SCALABLE-NEXT: [[TMP13:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP12]] ; SCALABLE-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> [[VEC_PHI]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll index bb716d7..1583e07 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll @@ -20,7 +20,7 @@ define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_ran ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 1 x i32> @llvm.masked.load.nxv1i32.p0(ptr [[TMP5]], i32 4, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i32> poison) -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <vscale x 1 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 1 x i32> insertelement (<vscale x 1 x i32> poison, i32 1, i64 0), <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <vscale x 1 x i32> [[WIDE_MASKED_LOAD]], splat (i32 1) ; CHECK-NEXT: call void @llvm.masked.store.nxv1i32.p0(<vscale x 1 x i32> [[TMP6]], ptr [[TMP5]], i32 4, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP2]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -79,7 +79,7 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison) -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 1) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP8]], ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 437ba4b..822d5c8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -21,7 +21,7 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP7]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0 @@ -30,11 +30,11 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 8, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[TMP14]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison) -; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP16]], <vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison) +; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP16]], <vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -92,7 +92,7 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i64> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 64, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], splat (i64 64) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 64, [[TMP5]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0 @@ -102,9 +102,9 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison) -; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP13]], <vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison) +; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP13]], <vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -181,14 +181,14 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = mul <vscale x 4 x i64> [[TMP15]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 8, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = mul <vscale x 4 x i64> [[TMP15]], splat (i64 8) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP16]] ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x ptr> [[VECTOR_GEP]], i32 0 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP17]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i32> [[TMP19]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i32> [[TMP19]], splat (i32 1) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -256,7 +256,7 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] ; NOSTRIDED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4 -; NOSTRIDED-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NOSTRIDED-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1) ; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP10]], ptr [[TMP9]], align 4 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -340,7 +340,7 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] ; NOSTRIDED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4 -; NOSTRIDED-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NOSTRIDED-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1) ; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP10]], ptr [[TMP9]], align 4 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -472,7 +472,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP11]] ; NOSTRIDED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP13]], align 4 -; NOSTRIDED-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NOSTRIDED-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1) ; NOSTRIDED-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP11]] ; NOSTRIDED-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i32 0 ; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP14]], ptr [[TMP16]], align 4 @@ -533,7 +533,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; STRIDED-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i64> [[TMP12]], zeroinitializer -; STRIDED-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i64> [[TMP13]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; STRIDED-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i64> [[TMP13]], splat (i64 1) ; STRIDED-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP14]] ; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 1, [[TMP11]] ; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0 @@ -546,10 +546,10 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[TMP18]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP19]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison), !alias.scope [[META8:![0-9]+]] -; STRIDED-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP19]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META8:![0-9]+]] +; STRIDED-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1) ; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], <vscale x 4 x i64> [[TMP18]] -; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[TMP21]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]] +; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[TMP21]], i32 4, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]] ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -619,7 +619,7 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] ; NOSTRIDED-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4 -; NOSTRIDED-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NOSTRIDED-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1) ; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP10]], ptr [[TMP9]], align 4 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -769,9 +769,9 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP28:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT14]], [[TMP27]] ; STRIDED-NEXT: [[TMP29:%.*]] = mul <vscale x 4 x i64> [[TMP28]], [[DOTSPLAT10]] ; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], <vscale x 4 x i64> [[TMP29]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison), !alias.scope [[META15:![0-9]+]] -; STRIDED-NEXT: [[TMP30:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP30]], <vscale x 4 x ptr> [[VECTOR_GEP17]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]] +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META15:![0-9]+]] +; STRIDED-NEXT: [[TMP30:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1) +; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP30]], <vscale x 4 x ptr> [[VECTOR_GEP17]], i32 4, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]] ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] ; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP17]] ; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP25]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index fadaf54..30f1f0ca 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -324,7 +324,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[TMP6]], zeroinitializer -; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP7]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP7]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]] ; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] ; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0 @@ -336,7 +336,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 10, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], splat (i64 10) ; SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i64> poison) ; SCALABLE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i64> [[WIDE_MASKED_GATHER]], <vscale x 2 x i64> zeroinitializer ; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] @@ -432,7 +432,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add <vscale x 2 x i64> [[TMP5]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] ; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0 @@ -445,7 +445,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 ; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP9]], i64 1025) -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 10, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], splat (i64 10) ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i1> zeroinitializer ; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i64> poison) ; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i64> [[WIDE_MASKED_GATHER]], <vscale x 2 x i64> zeroinitializer @@ -911,7 +911,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX]], i64 0 ; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; SCALABLE-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP6]] -; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP7]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP7]], splat (i64 1) ; SCALABLE-NEXT: [[TMP9:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP8]] ; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 @@ -996,7 +996,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add <vscale x 2 x i64> [[TMP5]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] ; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0 @@ -1127,7 +1127,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[TMP6]], zeroinitializer -; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP7]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP7]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]] ; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] ; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0 @@ -1141,7 +1141,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 10, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], splat (i64 10) ; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP11]]) ; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] ; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 @@ -1234,7 +1234,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add <vscale x 2 x i64> [[TMP5]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] ; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0 @@ -1249,7 +1249,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 ; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP9]], i64 1025) -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 10, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], splat (i64 10) ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i1> zeroinitializer ; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP11]]) ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll index 501e27d..f18a098 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll @@ -41,11 +41,11 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.and.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.and.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -135,11 +135,11 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.or.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.or.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -229,11 +229,11 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.xor.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.xor.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -323,11 +323,11 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.shl.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.shl.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -417,11 +417,11 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.lshr.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.lshr.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -511,11 +511,11 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.ashr.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.ashr.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -605,11 +605,11 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -699,11 +699,11 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -793,11 +793,11 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 3, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -887,11 +887,11 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.sdiv.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 3, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.sdiv.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -981,11 +981,11 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.udiv.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 3, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.udiv.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -1075,11 +1075,11 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.srem.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 3, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.srem.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -1169,11 +1169,11 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.urem.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 3, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.urem.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] @@ -1266,11 +1266,11 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP16]], ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] @@ -1361,11 +1361,11 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP16]], ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] @@ -1456,11 +1456,11 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP16]], ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] @@ -1551,11 +1551,11 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP16]], ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] @@ -1699,11 +1699,11 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP16]], ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index e433899..66140de 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -50,11 +50,11 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV1]], 0 ; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] ; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 -; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP18]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP11]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[TMP19]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP20]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP21]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]]) +; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[TMP19]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-OUTLOOP-NEXT: [[TMP20]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP22]], [[EVL_BASED_IV1]] ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP8]] @@ -109,10 +109,10 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] ; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) -; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) +; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP12]]) -; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[TMP20]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] ; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] @@ -163,7 +163,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP14]], align 4 -; NO-VP-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 3) ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP21]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32> zeroinitializer ; NO-VP-OUTLOOP-NEXT: [[TMP17]] = add <vscale x 4 x i32> [[TMP16]], [[VEC_PHI]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] @@ -214,7 +214,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] ; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; NO-VP-INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 -; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> zeroinitializer ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]]) ; NO-VP-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]] @@ -300,13 +300,13 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]] ; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] ; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 -; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP11]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = xor <vscale x 4 x i1> [[TMP18]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3) +; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]]) +; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = xor <vscale x 4 x i1> [[TMP18]], splat (i1 true) ; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[TMP20]], <vscale x 4 x i1> zeroinitializer ; IF-EVL-OUTLOOP-NEXT: [[PREDPHI1:%.*]] = select <vscale x 4 x i1> [[TMP21]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP19]] -; IF-EVL-OUTLOOP-NEXT: [[PREDPHI]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[PREDPHI1]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]]) +; IF-EVL-OUTLOOP-NEXT: [[PREDPHI]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[PREDPHI1]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -374,8 +374,8 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP16:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]] ; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] ; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) -; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) +; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i1> zeroinitializer ; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> [[TMP20]], i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] @@ -432,9 +432,9 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] ; NO-VP-OUTLOOP-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP21]], align 4 -; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 3) ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]] -; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[TMP22]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[TMP18]], splat (i1 true) ; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP17]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP16]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NO-VP-OUTLOOP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -488,7 +488,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] ; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; NO-VP-INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 -; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> zeroinitializer ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]]) ; NO-VP-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]] @@ -604,7 +604,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[TMP12]], zeroinitializer -; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP13]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP13]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 ; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] @@ -666,7 +666,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; NO-VP-INLOOP-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i32> [[TMP6]], zeroinitializer -; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i32> [[TMP7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i32> [[TMP7]], splat (i32 1) ; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]] ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32 ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] @@ -798,7 +798,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[TMP12]], zeroinitializer -; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP13]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP13]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 ; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] @@ -815,7 +815,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP27]], align 4 ; NO-VP-OUTLOOP-NEXT: [[TMP28:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_IND]] ; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]] -; NO-VP-OUTLOOP-NEXT: [[TMP23:%.*]] = xor <vscale x 4 x i1> [[TMP28]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-OUTLOOP-NEXT: [[TMP23:%.*]] = xor <vscale x 4 x i1> [[TMP28]], splat (i1 true) ; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP23]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP22]] ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] @@ -865,7 +865,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; NO-VP-INLOOP-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i32> [[TMP6]], zeroinitializer -; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i32> [[TMP7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i32> [[TMP7]], splat (i32 1) ; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]] ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32 ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll index 8e4eb9c..46340de 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll @@ -38,8 +38,8 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15]] = add i32 [[TMP14]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] @@ -236,8 +236,8 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.or.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.or.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15]] = or i32 [[TMP14]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] @@ -353,8 +353,8 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.and.nxv4i32(i32 -1, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.and.nxv4i32(i32 -1, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15]] = and i32 [[TMP14]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] @@ -470,8 +470,8 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15]] = xor i32 [[TMP14]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] @@ -587,8 +587,8 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 2147483647, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 2147483647, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] @@ -707,8 +707,8 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 -2147483648, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 -2147483648, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smax.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] @@ -827,8 +827,8 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 -1, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 -1, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umin.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] @@ -947,8 +947,8 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] @@ -1067,8 +1067,8 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15]] = fadd reassoc float [[TMP14]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] @@ -1265,8 +1265,8 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmin.nxv4f32(float 0x47EFFFFFE0000000, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmin.nxv4f32(float 0x47EFFFFFE0000000, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP14]], [[VEC_PHI]] ; IF-EVL-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 @@ -1387,8 +1387,8 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmax.nxv4f32(float 0xC7EFFFFFE0000000, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmax.nxv4f32(float 0xC7EFFFFFE0000000, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt float [[TMP14]], [[VEC_PHI]] ; IF-EVL-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 @@ -1671,12 +1671,12 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = fmul reassoc <vscale x 4 x float> [[VP_OP_LOAD]], [[VP_OP_LOAD1]] -; IF-EVL-NEXT: [[TMP17:%.*]] = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP17:%.*]] = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP18]] = fadd reassoc float [[TMP17]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] @@ -1802,10 +1802,10 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]] -; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -1855,7 +1855,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 -; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) ; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1929,10 +1929,10 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00) ; IF-EVL-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]] -; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -1982,7 +1982,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 -; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll index 8ba47e1..4c6ccb3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll @@ -31,7 +31,7 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; IF-EVL-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; IF-EVL-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i64> [[TMP10]], zeroinitializer -; IF-EVL-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i64> [[TMP11]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; IF-EVL-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i64> [[TMP11]], splat (i64 1) ; IF-EVL-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP12]] ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll index 80ed7157..bc55519 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll @@ -58,9 +58,9 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]), !alias.scope [[META0:![0-9]+]] -; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP16]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP12]]) +; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META0:![0-9]+]] +; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) +; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP12]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[TMP9]] @@ -122,8 +122,8 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]] ; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 -; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP14]]), !alias.scope [[META0:![0-9]+]] -; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP14]]) +; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]]), !alias.scope [[META0:![0-9]+]] +; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]]) ; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] ; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] @@ -131,7 +131,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL-INLOOP: middle.block: -; IF-EVL-INLOOP-NEXT: store i32 [[TMP22]], ptr [[ADDR]], align 4 +; IF-EVL-INLOOP-NEXT: store i32 [[TMP22]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]] ; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; IF-EVL-INLOOP: scalar.ph: ; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] @@ -146,7 +146,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-INLOOP-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4 ; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL-INLOOP: for.end: ; IF-EVL-INLOOP-NEXT: ret void ; @@ -187,7 +187,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; NO-VP-OUTLOOP: middle.block: ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP12]]) -; NO-VP-OUTLOOP-NEXT: store i32 [[TMP14]], ptr [[ADDR]], align 4 +; NO-VP-OUTLOOP-NEXT: store i32 [[TMP14]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]] ; NO-VP-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; NO-VP-OUTLOOP: scalar.ph: @@ -203,7 +203,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-OUTLOOP-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4 ; NO-VP-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; NO-VP-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; NO-VP-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; NO-VP-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP-OUTLOOP: for.end: ; NO-VP-OUTLOOP-NEXT: ret void ; @@ -243,7 +243,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; NO-VP-INLOOP: middle.block: -; NO-VP-INLOOP-NEXT: store i32 [[TMP12]], ptr [[ADDR]], align 4 +; NO-VP-INLOOP-NEXT: store i32 [[TMP12]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]] ; NO-VP-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; NO-VP-INLOOP: scalar.ph: @@ -259,7 +259,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-INLOOP-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4 ; NO-VP-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; NO-VP-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; NO-VP-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; NO-VP-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; NO-VP-INLOOP: for.end: ; NO-VP-INLOOP-NEXT: ret void ; @@ -300,7 +300,9 @@ for.end: ; IF-EVL-INLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; IF-EVL-INLOOP: [[META4]] = !{!"llvm.loop.isvectorized", i32 1} ; IF-EVL-INLOOP: [[META5]] = !{!"llvm.loop.unroll.runtime.disable"} -; IF-EVL-INLOOP: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +; IF-EVL-INLOOP: [[META6]] = !{[[META7:![0-9]+]]} +; IF-EVL-INLOOP: [[META7]] = distinct !{[[META7]], [[META2]]} +; IF-EVL-INLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} ;. ; NO-VP-OUTLOOP: [[META0]] = !{[[META1:![0-9]+]]} ; NO-VP-OUTLOOP: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]} @@ -308,7 +310,9 @@ for.end: ; NO-VP-OUTLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; NO-VP-OUTLOOP: [[META4]] = !{!"llvm.loop.isvectorized", i32 1} ; NO-VP-OUTLOOP: [[META5]] = !{!"llvm.loop.unroll.runtime.disable"} -; NO-VP-OUTLOOP: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +; NO-VP-OUTLOOP: [[META6]] = !{[[META7:![0-9]+]]} +; NO-VP-OUTLOOP: [[META7]] = distinct !{[[META7]], [[META2]]} +; NO-VP-OUTLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} ;. ; NO-VP-INLOOP: [[META0]] = !{[[META1:![0-9]+]]} ; NO-VP-INLOOP: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]} @@ -316,5 +320,7 @@ for.end: ; NO-VP-INLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; NO-VP-INLOOP: [[META4]] = !{!"llvm.loop.isvectorized", i32 1} ; NO-VP-INLOOP: [[META5]] = !{!"llvm.loop.unroll.runtime.disable"} -; NO-VP-INLOOP: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +; NO-VP-INLOOP: [[META6]] = !{[[META7:![0-9]+]]} +; NO-VP-INLOOP: [[META7]] = distinct !{[[META7]], [[META2]]} +; NO-VP-INLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll index cb4cf3a..8addd35 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll @@ -35,10 +35,10 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { ; IF-EVL-NEXT: [[TMP13:%.*]] = add i32 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP13]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP_LOAD]], ptr align 4 [[TMP17]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP_LOAD]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP12]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[TMP10]] ; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i32 [[IV_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll index a8cf002..fbc4ad0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll @@ -38,9 +38,9 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]]) -; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]]) -; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]]) +; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]]) +; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -114,9 +114,9 @@ define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]]) -; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]]) -; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]]) +; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]]) +; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -186,9 +186,9 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 -; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP5]]) -; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP5]]) -; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP8]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP5]]) +; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]]) ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll index 99da505..ab58bb0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll @@ -44,13 +44,13 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp ne <vscale x 4 x i32> [[VP_OP_LOAD]], zeroinitializer ; IF-EVL-NEXT: [[TMP18:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[TMP17]], <vscale x 4 x i1> zeroinitializer ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> [[TMP18]], i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD3]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD3]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP20]], <vscale x 4 x i1> [[TMP18]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll index 314d30f..f0fac60 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll @@ -38,8 +38,8 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14]] = call float @llvm.vp.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14]] = call float @llvm.vp.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll index 06d0eb3..f55e755 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll @@ -37,9 +37,9 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -238,9 +238,9 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -347,7 +347,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), i32 [[START:%.*]], i32 0 +; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> splat (i32 -1), i32 [[START:%.*]], i32 0 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -358,9 +358,9 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -399,7 +399,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), i32 [[START:%.*]], i32 0 +; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x i32> splat (i32 -1), i32 [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -478,9 +478,9 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -599,10 +599,10 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP9]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -726,10 +726,10 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP9]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -853,10 +853,10 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP9]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp ult <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -980,10 +980,10 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP9]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp ugt <vscale x 4 x i32> [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -1095,7 +1095,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float [[START:%.*]], i32 0 +; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1106,9 +1106,9 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call reassoc <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> [[VP_OP]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call reassoc <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[VP_OP]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -1147,7 +1147,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float [[START:%.*]], i32 0 +; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1308,10 +1308,10 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP9]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x float> @llvm.vp.select.nxv4f32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -1435,10 +1435,10 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP9]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast ogt <vscale x 4 x float> [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x float> @llvm.vp.select.nxv4f32(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -1712,7 +1712,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float [[START:%.*]], i32 0 +; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1723,12 +1723,12 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = call reassoc <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VP_OP_LOAD1]], <vscale x 4 x float> [[VEC_PHI]]) -; IF-EVL-NEXT: [[TMP17]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP17]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -1769,7 +1769,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float [[START:%.*]], i32 0 +; NO-VP-NEXT: [[TMP6:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1854,10 +1854,10 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP13:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-NEXT: [[TMP14:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP13]] -; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -1907,7 +1907,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 -; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) ; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1981,10 +1981,10 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00) ; IF-EVL-NEXT: [[TMP14:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP13]] -; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]]) +; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] @@ -2034,7 +2034,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 -; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index 9a001f3..cd36aee 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -38,15 +38,15 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP4]] ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP9]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP10]] -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] ; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP4]] ; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP4]] ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP14]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP15]] -; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] @@ -138,28 +138,28 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; IF-EVL-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]] ; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP9]] -; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1023, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], splat (i64 1023) ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[TMP6]], -1 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP7]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 100, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 100) ; IF-EVL-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP4]] ; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP4]] ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]] -; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP4]] ; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP4]] ; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]] ; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] -; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]]) -; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP25]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP26]], [[EVL_BASED_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll index 322a6c1..6c2c261 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll @@ -34,11 +34,11 @@ define void @test(ptr %p) { ; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] ; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[TMP7]], 200 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP_LOAD]], ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP_LOAD]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] @@ -354,11 +354,11 @@ define void @trivial_due_max_vscale(ptr %p) { ; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] ; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 32 [[TMP9]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 32 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[TMP7]], 8192 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP6]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] @@ -439,11 +439,11 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]] ; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr align 32 [[TMP3]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr align 32 [[TMP3]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], 1024 ; IF-EVL-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP4]] ; IF-EVL-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP6]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP6]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll index c492b29..d15a66f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll @@ -42,8 +42,8 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP9]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP14]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[TMP19]], i64 [[TMP18]] -; CHECK-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP11]]) -; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_REVERSE]], ptr align 8 [[TMP20]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP11]]) +; CHECK-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_REVERSE]], ptr align 8 [[TMP20]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll index 83f7dc3..55a8473 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll @@ -35,14 +35,14 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD1]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) +; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD1]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP13]] ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP19]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll index ed807b0..eb60778 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll @@ -28,7 +28,7 @@ define void @foo() { ; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP5:%.*]] = add <vscale x 4 x i64> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[TMP5]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[TMP5]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP19]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0 @@ -38,21 +38,21 @@ define void @foo() { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[OUTER_LOOP_LATCH4:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[OUTER_LOOP_LATCH4]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, <vscale x 4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison) ; CHECK-NEXT: br label [[INNER_LOOP1:%.*]] ; CHECK: inner_loop1: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP13:%.*]], [[INNER_LOOP1]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x float> [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP12:%.*]], [[INNER_LOOP1]] ] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, <vscale x 4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison) ; CHECK-NEXT: [[TMP12]] = fmul <vscale x 4 x float> [[VEC_PHI2]], [[WIDE_MASKED_GATHER3]] -; CHECK-NEXT: [[TMP13]] = add nuw nsw <vscale x 4 x i64> [[VEC_PHI]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <vscale x 4 x i64> [[TMP13]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 512, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP13]] = add nuw nsw <vscale x 4 x i64> [[VEC_PHI]], splat (i64 1) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <vscale x 4 x i64> [[TMP13]], splat (i64 512) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0 ; CHECK-NEXT: br i1 [[TMP15]], label [[OUTER_LOOP_LATCH4]], label [[INNER_LOOP1]] ; CHECK: vector.latch: ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <vscale x 4 x float> [ [[TMP12]], [[INNER_LOOP1]] ] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI5]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI5]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -111,6 +111,7 @@ define void @foo() { ; NO_SCALABLE_VECS-NEXT: br i1 [[OUTER_EXITCOND]], label [[EXIT:%.*]], label [[OUTER_LOOP]], !llvm.loop [[LOOP0:![0-9]+]] ; NO_SCALABLE_VECS: exit: ; NO_SCALABLE_VECS-NEXT: ret void +; entry: br label %outer_loop diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index fb17487..f469776 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -170,7 +170,7 @@ for.end: define i64 @constant_folded_previous_value() { ; CHECK-VF4UF2-LABEL: @constant_folded_previous_value ; CHECK-VF4UF2: vector.body -; CHECK-VF4UF2: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i64> [ %vector.recur.init, %vector.ph ], [ shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), %vector.body ] +; CHECK-VF4UF2: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i64> [ %vector.recur.init, %vector.ph ], [ splat (i64 1), %vector.body ] ; CHECK-VF4UF2: br i1 {{.*}}, label %middle.block, label %vector.body entry: br label %scalar.body diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index 71ec13e..94fce86 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -190,7 +190,7 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() -; CHECK-NEXT: [[TMP7:%.*]] = shl <vscale x 4 x i32> [[TMP6]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP7:%.*]] = shl <vscale x 4 x i32> [[TMP6]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP5]] to i32 ; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 1 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP9]], i64 0 @@ -269,7 +269,7 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP8:%.*]] = uitofp <vscale x 4 x i32> [[TMP7]] to <vscale x 4 x float> -; CHECK-NEXT: [[TMP9:%.*]] = fmul <vscale x 4 x float> [[TMP8]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = fmul <vscale x 4 x float> [[TMP8]], splat (float 2.000000e+00) ; CHECK-NEXT: [[INDUCTION:%.*]] = fadd <vscale x 4 x float> [[TMP9]], zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = uitofp i64 [[TMP6]] to float ; CHECK-NEXT: [[TMP13:%.*]] = fmul float [[TMP12]], 2.000000e+00 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll index b14d5a9..0908139 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll @@ -29,7 +29,7 @@ define void @test(ptr %d) { ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4096, ptr [[ARR]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 -; CHECK-NEXT: store <vscale x 2 x i32> shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 100, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP8]], align 8 +; CHECK-NEXT: store <vscale x 2 x i32> splat (i32 100), ptr [[TMP8]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4096, ptr [[ARR]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -101,7 +101,7 @@ define void @testloopvariant(ptr %d) { ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4096, ptr [[ARR]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 -; CHECK-NEXT: store <vscale x 2 x i32> shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 100, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP8]], align 8 +; CHECK-NEXT: store <vscale x 2 x i32> splat (i32 100), ptr [[TMP8]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4096, ptr [[ARR]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll index 9654280..3f9ca05 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -19,7 +19,7 @@ ; CHECKUF1: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; CHECKUF1: %[[IDXB:.*]] = getelementptr inbounds double, ptr %b, i64 %index ; CHECKUF1: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8 -; CHECKUF1: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer) +; CHECKUF1: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, splat (double 1.000000e+00) ; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index ; CHECKUF1: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8 ; CHECKUF1: %index.next = add nuw i64 %index, %[[VSCALEX4]] @@ -52,8 +52,8 @@ ; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds i8, ptr %[[IDXB]], i64 %[[VSCALE2]] ; CHECKUF2: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8 ; CHECKUF2: %wide.load{{[0-9]+}} = load <vscale x 4 x double>, ptr %[[IDXB_NEXT]], align 8 -; CHECKUF2: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer) -; CHECKUF2: %[[FADD_NEXT:.*]] = fadd <vscale x 4 x double> %wide.load{{[0-9]+}}, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer) +; CHECKUF2: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, splat (double 1.000000e+00) +; CHECKUF2: %[[FADD_NEXT:.*]] = fadd <vscale x 4 x double> %wide.load{{[0-9]+}}, splat (double 1.000000e+00) ; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index ; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() ; CHECKUF2: %[[VSCALE2:.*]] = shl i64 %[[VSCALE]], 5 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index 7d9f0c1..223acfa 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -13,8 +13,8 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ insertelement (<vscale x 8 x i32> zeroinitializer, i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP36:%.*]], %vector.body ] -; CHECK: [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) +; CHECK: [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], splat (i32 255) +; CHECK-NEXT: [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], splat (i32 255) ; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr ; CHECK: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr ; CHECK-NEXT: [[TMP26:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32> diff --git a/llvm/test/Transforms/MemCpyOpt/vscale-crashes.ll b/llvm/test/Transforms/MemCpyOpt/vscale-crashes.ll index 27d9e6b..0f26a08 100644 --- a/llvm/test/Transforms/MemCpyOpt/vscale-crashes.ll +++ b/llvm/test/Transforms/MemCpyOpt/vscale-crashes.ll @@ -71,7 +71,7 @@ define void @callslotoptzn(<vscale x 4 x float> %val, ptr %out) { ; CHECK-NEXT: [[ALLOC:%.*]] = alloca <vscale x 4 x float>, align 16 ; CHECK-NEXT: [[IDX:%.*]] = tail call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[STRIDE:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], <vscale x 4 x i32> [[IDX]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VAL:%.*]], <vscale x 4 x ptr> [[STRIDE]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VAL:%.*]], <vscale x 4 x ptr> [[STRIDE]], i32 4, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[LI:%.*]] = load <vscale x 4 x float>, ptr [[ALLOC]], align 4 ; CHECK-NEXT: store <vscale x 4 x float> [[LI]], ptr [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-load-store.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-load-store.ll index 1afd895..8192364 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-load-store.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-load-store.ll @@ -121,7 +121,7 @@ define <vscale x 1 x i64> @vpload_nxv1i64_vscale(ptr %ptr, <vscale x 1 x i1> %m) define <vscale x 1 x i64> @vpload_nxv1i64_allones_mask(ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: @vpload_nxv1i64_allones_mask( ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[PTR:%.*]], i32 1, <vscale x 1 x i1> [[TMP2]], <vscale x 1 x i64> poison) @@ -173,7 +173,7 @@ define void @vpstore_nxv1i64_vscale(<vscale x 1 x i64> %val, ptr %ptr, <vscale x define void @vpstore_nxv1i64_allones_mask(<vscale x 1 x i64> %val, ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: @vpstore_nxv1i64_allones_mask( ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1 ; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[VAL:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 1 x i1> [[TMP2]]) diff --git a/llvm/test/Transforms/SCCP/no-fold-fcmp-dynamic-denormal-mode-issue114947.ll b/llvm/test/Transforms/SCCP/no-fold-fcmp-dynamic-denormal-mode-issue114947.ll index c91f6dc..1eb5417 100644 --- a/llvm/test/Transforms/SCCP/no-fold-fcmp-dynamic-denormal-mode-issue114947.ll +++ b/llvm/test/Transforms/SCCP/no-fold-fcmp-dynamic-denormal-mode-issue114947.ll @@ -100,7 +100,7 @@ define <2 x i1> @no_fold_fcmp_denormal_double_ieee_dynamic_vector_nonsplat() #0 define <vscale x 2 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_scalable_vector_splat() #0 { ; CHECK-LABEL: define <vscale x 2 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_scalable_vector_splat( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %cmp = fcmp une <vscale x 2 x double> splat (double 2.0), zeroinitializer ret <vscale x 2 x i1> %cmp @@ -109,7 +109,7 @@ define <vscale x 2 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_scalable_vec define <vscale x 2 x i1> @no_fold_fcmp_denormal_double_ieee_dynamic_scalaable_vector_splat() #0 { ; CHECK-LABEL: define <vscale x 2 x i1> @no_fold_fcmp_denormal_double_ieee_dynamic_scalaable_vector_splat( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer) +; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true) ; %cmp = fcmp une <vscale x 2 x double> splat (double 0x8000000000000), zeroinitializer ret <vscale x 2 x i1> %cmp diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll index 058aaea..c123458 100644 --- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll @@ -51,7 +51,7 @@ define <vscale x 1 x i64> @add_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -60,7 +60,7 @@ define <vscale x 1 x i64> @add_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -77,7 +77,7 @@ define <vscale x 1 x i64> @add_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <v ; ALL-LABEL: @add_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -103,7 +103,7 @@ define <vscale x 1 x i64> @sub_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -112,7 +112,7 @@ define <vscale x 1 x i64> @sub_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -129,7 +129,7 @@ define <vscale x 1 x i64> @sub_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <v ; ALL-LABEL: @sub_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -155,7 +155,7 @@ define <vscale x 1 x i64> @mul_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -164,7 +164,7 @@ define <vscale x 1 x i64> @mul_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -181,7 +181,7 @@ define <vscale x 1 x i64> @mul_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <v ; ALL-LABEL: @mul_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -207,7 +207,7 @@ define <vscale x 1 x i64> @sdiv_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -216,7 +216,7 @@ define <vscale x 1 x i64> @sdiv_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -233,7 +233,7 @@ define <vscale x 1 x i64> @sdiv_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, < ; ALL-LABEL: @sdiv_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -280,7 +280,7 @@ define <vscale x 1 x i64> @udiv_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -289,7 +289,7 @@ define <vscale x 1 x i64> @udiv_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -306,7 +306,7 @@ define <vscale x 1 x i64> @udiv_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, < ; ALL-LABEL: @udiv_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -353,7 +353,7 @@ define <vscale x 1 x i64> @srem_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -362,7 +362,7 @@ define <vscale x 1 x i64> @srem_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -379,7 +379,7 @@ define <vscale x 1 x i64> @srem_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, < ; ALL-LABEL: @srem_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -426,7 +426,7 @@ define <vscale x 1 x i64> @urem_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -435,7 +435,7 @@ define <vscale x 1 x i64> @urem_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -452,7 +452,7 @@ define <vscale x 1 x i64> @urem_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, < ; ALL-LABEL: @urem_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -675,7 +675,7 @@ define <vscale x 1 x i64> @ashr_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -684,7 +684,7 @@ define <vscale x 1 x i64> @ashr_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -701,7 +701,7 @@ define <vscale x 1 x i64> @ashr_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, < ; ALL-LABEL: @ashr_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -727,7 +727,7 @@ define <vscale x 1 x i64> @lshr_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.lshr.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.lshr.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -736,7 +736,7 @@ define <vscale x 1 x i64> @lshr_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.lshr.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.lshr.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -753,7 +753,7 @@ define <vscale x 1 x i64> @lshr_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, < ; ALL-LABEL: @lshr_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.lshr.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.lshr.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -779,7 +779,7 @@ define <vscale x 1 x i64> @shl_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -788,7 +788,7 @@ define <vscale x 1 x i64> @shl_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -805,7 +805,7 @@ define <vscale x 1 x i64> @shl_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <v ; ALL-LABEL: @shl_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -831,7 +831,7 @@ define <vscale x 1 x i64> @or_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.or.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.or.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -840,7 +840,7 @@ define <vscale x 1 x i64> @or_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.or.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.or.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -857,7 +857,7 @@ define <vscale x 1 x i64> @or_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vs ; ALL-LABEL: @or_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.or.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.or.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -883,7 +883,7 @@ define <vscale x 1 x i64> @and_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.and.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.and.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -892,7 +892,7 @@ define <vscale x 1 x i64> @and_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.and.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.and.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -909,7 +909,7 @@ define <vscale x 1 x i64> @and_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <v ; ALL-LABEL: @and_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.and.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.and.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -935,7 +935,7 @@ define <vscale x 1 x i64> @xor_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.xor.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.xor.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -944,7 +944,7 @@ define <vscale x 1 x i64> @xor_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.xor.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.xor.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -961,7 +961,7 @@ define <vscale x 1 x i64> @xor_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <v ; ALL-LABEL: @xor_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.xor.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.xor.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -978,7 +978,7 @@ define <vscale x 1 x i64> @smin_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.smin.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.smin.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -995,7 +995,7 @@ define <vscale x 1 x i64> @smin_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, < ; ALL-LABEL: @smin_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.smin.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.smin.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -1012,7 +1012,7 @@ define <vscale x 1 x i64> @smax_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.smax.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.smax.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -1029,7 +1029,7 @@ define <vscale x 1 x i64> @smax_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, < ; ALL-LABEL: @smax_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.smax.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.smax.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -1046,7 +1046,7 @@ define <vscale x 1 x i64> @umin_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.umin.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.umin.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -1065,7 +1065,7 @@ define <vscale x 1 x i64> @umax_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 % ; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.umax.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.umax.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -1082,7 +1082,7 @@ define <vscale x 1 x i64> @umax_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, < ; ALL-LABEL: @umax_nxv1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.umax.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.umax.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> splat (i64 42), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x i64> [[TMP4]] ; @@ -1108,7 +1108,7 @@ define <vscale x 1 x float> @fadd_nxv1f32_allonesmask(<vscale x 1 x float> %x, f ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1125,7 +1125,7 @@ define <vscale x 1 x float> @fadd_nxv1f32_anymask(<vscale x 1 x float> %x, float ; ALL-LABEL: @fadd_nxv1f32_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1151,7 +1151,7 @@ define <vscale x 1 x float> @fsub_nxv1f32_allonesmask(<vscale x 1 x float> %x, f ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1168,7 +1168,7 @@ define <vscale x 1 x float> @fsub_nxv1f32_anymask(<vscale x 1 x float> %x, float ; ALL-LABEL: @fsub_nxv1f32_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1194,7 +1194,7 @@ define <vscale x 1 x float> @fdiv_nxv1f32_allonesmask(<vscale x 1 x float> %x, f ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1211,7 +1211,7 @@ define <vscale x 1 x float> @fdiv_nxv1f32_anymask(<vscale x 1 x float> %x, float ; ALL-LABEL: @fdiv_nxv1f32_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1237,7 +1237,7 @@ define <vscale x 1 x float> @frem_nxv1f32_allonesmask(<vscale x 1 x float> %x, f ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1254,7 +1254,7 @@ define <vscale x 1 x float> @frem_nxv1f32_allonesmask(<vscale x 1 x float> %x, f ; ALL-LABEL: @frem_nxv1f32_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1280,7 +1280,7 @@ define <vscale x 1 x float> @fdiv_nxv1f32_allonesmask_knownvl(<vscale x 1 x floa ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 4) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK]], i32 4) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 4) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1297,7 +1297,7 @@ define <vscale x 1 x float> @fdiv_nxv1f32_anymask_knownvl(<vscale x 1 x float> % ; ALL-LABEL: @fdiv_nxv1f32_anymask_knownvl( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 4) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK:%.*]], i32 4) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 4) ; ALL-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1323,7 +1323,7 @@ define <vscale x 1 x float> @frem_nxv1f32_allonesmask_knownvl(<vscale x 1 x floa ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 4) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK]], i32 4) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 4) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1340,7 +1340,7 @@ define <vscale x 1 x float> @frem_nxv1f32_allonesmask_knownvl(<vscale x 1 x floa ; ALL-LABEL: @frem_nxv1f32_anymask_knownvl( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 4) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK:%.*]], i32 4) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 4) ; ALL-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1366,7 +1366,7 @@ define <vscale x 1 x float> @copysign_nxv1f32_allonesmask(<vscale x 1 x float> % ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.copysign.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.copysign.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1383,7 +1383,7 @@ define <vscale x 1 x float> @copysign_nxv1f32_anymask(<vscale x 1 x float> %x, f ; ALL-LABEL: @copysign_nxv1f32_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.copysign.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.copysign.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1409,7 +1409,7 @@ define <vscale x 1 x float> @minnum_nxv1f32_allonesmask(<vscale x 1 x float> %x, ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.minnum.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.minnum.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1426,7 +1426,7 @@ define <vscale x 1 x float> @minnum_nxv1f32_anymask(<vscale x 1 x float> %x, flo ; ALL-LABEL: @minnum_nxv1f32_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.minnum.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.minnum.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1452,7 +1452,7 @@ define <vscale x 1 x float> @maxnum_nxv1f32_allonesmask(<vscale x 1 x float> %x, ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.maxnum.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.maxnum.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1469,7 +1469,7 @@ define <vscale x 1 x float> @maxnum_nxv1f32_anymask(<vscale x 1 x float> %x, flo ; ALL-LABEL: @maxnum_nxv1f32_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.maxnum.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.maxnum.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> splat (float 4.200000e+01), <vscale x 1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 1 x float> [[TMP4]] ; @@ -1495,7 +1495,7 @@ define <vscale x 8 x i8> @add_nxv8i8_allonesmask(<vscale x 8 x i8> %x, i8 %y, i3 ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 8 x i1> [[SPLAT]], <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 8 x i8> [[TMP1]], <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> [[TMP2]], <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 42, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> [[TMP2]], <vscale x 8 x i8> splat (i8 42), <vscale x 8 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> [[X:%.*]], <vscale x 8 x i8> [[TMP3]], <vscale x 8 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 8 x i8> [[TMP4]] ; @@ -1512,7 +1512,7 @@ define <vscale x 8 x i8> @add_nxv8i8_anymask(<vscale x 8 x i8> %x, i8 %y, <vscal ; ALL-LABEL: @add_nxv8i8_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 8 x i8> [[TMP1]], <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> [[TMP2]], <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 42, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> [[TMP2]], <vscale x 8 x i8> splat (i8 42), <vscale x 8 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> [[X:%.*]], <vscale x 8 x i8> [[TMP3]], <vscale x 8 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 8 x i8> [[TMP4]] ; @@ -1538,7 +1538,7 @@ define <vscale x 8 x half> @fadd_nxv1f16_allonesmask(<vscale x 8 x half> %x, hal ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 8 x i1> [[SPLAT]], <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 8 x half> poison, half [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> [[TMP2]], <vscale x 8 x half> shufflevector (<vscale x 8 x half> insertelement (<vscale x 8 x half> poison, half 0xH5140, i64 0), <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> [[TMP2]], <vscale x 8 x half> splat (half 0xH5140), <vscale x 8 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> [[X:%.*]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <vscale x 8 x half> [[TMP4]] ; @@ -1555,7 +1555,7 @@ define <vscale x 8 x half> @fadd_nxv8f16_anymask(<vscale x 8 x half> %x, half %y ; ALL-LABEL: @fadd_nxv8f16_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 8 x half> poison, half [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> [[TMP2]], <vscale x 8 x half> shufflevector (<vscale x 8 x half> insertelement (<vscale x 8 x half> poison, half 0xH5140, i64 0), <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> [[TMP2]], <vscale x 8 x half> splat (half 0xH5140), <vscale x 8 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> [[X:%.*]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <vscale x 8 x half> [[TMP4]] ; diff --git a/llvm/test/Transforms/VectorCombine/pr88796.ll b/llvm/test/Transforms/VectorCombine/pr88796.ll index 4f26f5d..6f98892 100644 --- a/llvm/test/Transforms/VectorCombine/pr88796.ll +++ b/llvm/test/Transforms/VectorCombine/pr88796.ll @@ -4,7 +4,7 @@ define i32 @test() { ; CHECK-LABEL: define i32 @test() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16> trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 268435456, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)) +; CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16> trunc (<vscale x 8 x i32> splat (i32 268435456) to <vscale x 8 x i16>)) ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index ca40a8d..f75c2e4 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -998,7 +998,7 @@ llvm.func @vector_splat_nonzero() -> vector<4xf32> { // CHECK-LABEL: @vector_splat_nonzero_scalable llvm.func @vector_splat_nonzero_scalable() -> vector<[4]xf32> { - // CHECK: ret <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) + // CHECK: ret <vscale x 4 x float> splat (float 1.000000e+00) %0 = llvm.mlir.constant(dense<1.000000e+00> : vector<[4]xf32>) : vector<[4]xf32> llvm.return %0 : vector<[4]xf32> } |