diff options
author | Florian Hahn <flo@fhahn.com> | 2025-06-15 21:59:30 +0100 |
---|---|---|
committer | Florian Hahn <flo@fhahn.com> | 2025-06-15 22:00:47 +0100 |
commit | 790df93298b3ad6c57dafb55fc6d18bddff16c4a (patch) | |
tree | d8a0a8a72895a236e0f1c87ba7bac972ae49a34b | |
parent | 087a6ac420ad99c523b9dd517351e0c6d1f1a980 (diff) | |
download | llvm-790df93298b3ad6c57dafb55fc6d18bddff16c4a.zip llvm-790df93298b3ad6c57dafb55fc6d18bddff16c4a.tar.gz llvm-790df93298b3ad6c57dafb55fc6d18bddff16c4a.tar.bz2 |
[VPlan] Mark VPFirstOrderRecurrencePHI as not reading/writing memory.
First-order recurrence phis don't have side-effects and don't read or
write memory. Mark them as such.
15 files changed, 99 insertions, 213 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 10906d9..cca3d32 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1071,7 +1071,14 @@ void VPlan::execute(VPTransformState *State) { InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { // For now only return the cost of the vector loop region, ignoring any other // blocks, like the preheader or middle blocks. - return getVectorLoopRegion()->cost(VF, Ctx); + InstructionCost Cost = getVectorLoopRegion()->cost(VF, Ctx); + + // If any instructions in the middle block are invalid return invalid. + // TODO: Remove once no VPlans with VF == vscale x 1 and first-order recurrences are created. + if (!getMiddleBlock()->cost(VF, Ctx).isValid()) + return InstructionCost::getInvalid(); + + return Cost; } VPRegionBlock *VPlan::getVectorLoopRegion() { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 3bdfa67..048286d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -66,6 +66,7 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPWidenIntrinsicSC: return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory(); case VPBranchOnMaskSC: + case VPFirstOrderRecurrencePHISC: case VPScalarIVStepsSC: case VPPredInstPHISC: return false; @@ -113,6 +114,7 @@ bool VPRecipeBase::mayReadFromMemory() const { case VPWidenIntrinsicSC: return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory(); case VPBranchOnMaskSC: + case VPFirstOrderRecurrencePHISC: case VPPredInstPHISC: case VPScalarIVStepsSC: case VPWidenStoreEVLSC: @@ -146,6 +148,7 @@ bool VPRecipeBase::mayReadFromMemory() const { bool VPRecipeBase::mayHaveSideEffects() const { switch (getVPDefID()) { case VPDerivedIVSC: + case VPFirstOrderRecurrencePHISC: case VPPredInstPHISC: case VPVectorEndPointerSC: return false; @@ -837,6 +840,10 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, I32Ty, {Arg0Ty, I32Ty, I1Ty}); return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind); } + case VPInstruction::ExtractPenultimateElement: + if (VF == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + LLVM_FALLTHROUGH; default: // TODO: Compute cost other VPInstructions once the legacy cost model has // been retired. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 6ed9c85..4af4929 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -105,12 +105,11 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ <i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i32>, ptr [[TMP7]], align 1 -; CHECK-NEXT: [[TMP9]] = zext <2 x i32> [[WIDE_LOAD4]] to <2 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i32> [[WIDE_LOAD4]] to <2 x i64> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -162,12 +161,11 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD3]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i32> [[WIDE_LOAD3]] to <4 x i64> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -306,7 +304,6 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 @@ -316,7 +313,7 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) { ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i32> [[TMP7]] to <4 x i64> ; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64> -; CHECK-NEXT: [[TMP10]] = shl <4 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = shl <4 x i64> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index e97bb85..31be886 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -750,11 +750,10 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <16 x i32> [[TMP3]], splat (i32 2) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[TMP4]] to <16 x i8> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] @@ -832,11 +831,10 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 -; CHECK-NEXT: [[TMP5]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <16 x i32> [[TMP5]], splat (i32 2) ; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 52dcba6..b091452 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -890,14 +890,9 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16 -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8 -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 -; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 0, i32 [[TMP8]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0 ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() @@ -917,7 +912,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD3]] to <vscale x 8 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = mul <vscale x 8 x i32> [[TMP22]], [[TMP15]] -; CHECK-INTERLEAVED-NEXT: [[TMP25]] = mul <vscale x 8 x i32> [[TMP23]], [[TMP16]] +; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = mul <vscale x 8 x i32> [[TMP23]], [[TMP16]] ; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> [[TMP24]], <vscale x 8 x i32> [[TMP25]], i32 -1) ; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = add <vscale x 8 x i32> [[TMP25]], [[TMP26]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -1020,14 +1015,9 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8 -; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 -; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 0, i32 [[TMP8]] ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVE1: vector.body: ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-INTERLEAVE1-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0 ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP11]], align 1 @@ -1037,7 +1027,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP14]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = mul <vscale x 8 x i32> [[TMP15]], [[TMP12]] -; CHECK-INTERLEAVE1-NEXT: [[TMP17]] = add <vscale x 8 x i32> [[TMP16]], [[TMP15]] +; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = add <vscale x 8 x i32> [[TMP16]], [[TMP15]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1067,14 +1057,9 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16 -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8 -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 -; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 0, i32 [[TMP8]] ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-INTERLEAVED-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 8 @@ -1088,7 +1073,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP20]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = mul <vscale x 8 x i32> [[TMP22]], [[TMP15]] -; CHECK-INTERLEAVED-NEXT: [[TMP21]] = add <vscale x 8 x i32> [[TMP30]], [[TMP22]] +; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = add <vscale x 8 x i32> [[TMP30]], [[TMP22]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1118,14 +1103,9 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8 -; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 -; CHECK-MAXBW-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 0, i32 [[TMP8]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-MAXBW-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0 ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP13]], align 1 @@ -1135,7 +1115,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP18]], align 1 ; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32> ; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = mul <vscale x 8 x i32> [[TMP19]], [[TMP14]] -; CHECK-MAXBW-NEXT: [[TMP21]] = add <vscale x 8 x i32> [[TMP20]], [[TMP19]] +; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = add <vscale x 8 x i32> [[TMP20]], [[TMP19]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index ea8df16..d41caca 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -31,15 +31,10 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 -; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 1, i32 [[TMP19]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] @@ -121,15 +116,10 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 0, i32 [[TMP16]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] @@ -211,15 +201,10 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 0, i32 [[TMP16]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] @@ -311,15 +296,10 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 -; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 1, i32 [[TMP19]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] @@ -413,15 +393,10 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 1, i32 [[TMP16]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] @@ -523,15 +498,10 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8 -; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 1, i32 [[TMP19]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] @@ -621,14 +591,9 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 1, i32 [[TMP8]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i16>, ptr [[TMP11]], align 2 @@ -705,14 +670,9 @@ define void @empty_block_with_phi_2(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 1, i32 [[TMP8]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i16>, ptr [[TMP11]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 83e9d61..3361068 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -217,14 +217,12 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C ; CHECK-NEXT: [[REC_START:%.*]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <2 x i64> poison, i64 [[REC_START]], i32 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP1]] = sub nsw <2 x i64> zeroinitializer, [[STEP_ADD]] +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i64> zeroinitializer, [[STEP_ADD]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 ; CHECK-NEXT: store i64 [[TMP2]], ptr [[GEP]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -299,13 +297,12 @@ define void @for_iv_trunc_optimized(ptr %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 1>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP0]], splat (i32 3) -; CHECK-NEXT: [[TMP3]] = or <4 x i32> [[TMP1]], splat (i32 3) +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], splat (i32 3) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3 ; CHECK-NEXT: store i32 [[TMP6]], ptr [[DST:%.*]], align 4 @@ -364,11 +361,9 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 8, i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[TMP4]] -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x ptr> poison, ptr [[A:%.*]], i32 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 2 @@ -377,7 +372,7 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 7 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP8]] @@ -392,7 +387,7 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP21]], i32 0 ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x ptr> [[TMP25]], ptr [[TMP22]], i32 1 ; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x ptr> [[TMP26]], ptr [[TMP23]], i32 2 -; CHECK-NEXT: [[TMP28]] = insertelement <4 x ptr> [[TMP27]], ptr [[TMP24]], i32 3 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x ptr> [[TMP27]], ptr [[TMP24]], i32 3 ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x ptr> [[TMP20]], <4 x ptr> [[TMP28]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x float>, ptr [[TMP21]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 19424e4..7aeb32a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -407,11 +407,10 @@ define i16 @iv_and_step_trunc() { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i16> [ <i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i16> -; CHECK-NEXT: [[TMP2]] = mul <2 x i16> [[VEC_IND1]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i16> [[VEC_IND1]], [[TMP1]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll b/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll index bcfa212..c02ec91 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll @@ -15,9 +15,8 @@ define void @no_use() { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, %[[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 @@ -60,9 +59,8 @@ define void @dead_use() { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, %[[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index 0cd7465..3688426 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -56,7 +56,6 @@ define void @test(ptr %p) { ; VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 1>, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ] ; VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 1, i16 2, i16 3, i16 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0 ; VEC-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 1 @@ -75,7 +74,7 @@ define void @test(ptr %p) { ; VEC-NEXT: store i64 0, ptr [[TMP25]], align 8 ; VEC-NEXT: store i64 0, ptr [[TMP26]], align 8 ; VEC-NEXT: [[TMP27:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) -; VEC-NEXT: [[TMP28]] = zext <4 x i16> [[TMP27]] to <4 x i64> +; VEC-NEXT: [[TMP28:%.*]] = zext <4 x i16> [[TMP27]] to <4 x i64> ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VEC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; VEC-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll index 6b7736f..53113b2 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll @@ -11,13 +11,12 @@ define float @for_load_interleave_only(ptr %src) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[NEXT_GEP]], align 4 -; CHECK-NEXT: [[TMP3]] = load float, ptr [[NEXT_GEP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[NEXT_GEP2]], align 4 ; CHECK-NEXT: store float 0.000000e+00, ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: store float 0.000000e+00, ptr [[NEXT_GEP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -27,12 +26,12 @@ define float @for_load_interleave_only(ptr %src) { ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1001, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[SRC]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[SRC]], %[[ENTRY]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[FOR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 16 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index d28db1c..13dc535 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -884,23 +884,21 @@ define i32 @PR27246(ptr %dst) { ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[I_016]] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3 ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3> -; UNROLL-NO-IC-NEXT: br label [[FOR_COND1:%.*]] +; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: -; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND1]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[FOR_COND1]] ] -; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND1]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) +; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 ; UNROLL-NO-IC-NEXT: store i32 [[TMP3]], ptr [[TMP1]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 -4) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND1]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 2 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3 @@ -943,10 +941,9 @@ define i32 @PR27246(ptr %dst) { ; UNROLL-NO-VF-NEXT: br label [[FOR_COND1:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND1]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[E_015]], [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[FOR_COND1]] ] ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[I_016]], [[INDEX]] ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-VF-NEXT: [[TMP3]] = add i32 [[OFFSET_IDX]], -1 +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1 ; UNROLL-NO-VF-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -1071,32 +1068,26 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD:%.*]], i32 3 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 10 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 12 -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 14 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP3]], 2 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2 -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[TMP7]], 2 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP12]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP13]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP14]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP16]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP17]], i32 2 -; UNROLL-NO-IC-NEXT: [[TMP24]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP18]], i32 3 +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP13]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP14]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1105,7 +1096,7 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD:%.*]], [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] @@ -1133,7 +1124,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2 @@ -1141,7 +1131,7 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP5]] ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 -; UNROLL-NO-VF-NEXT: [[TMP10]] = load i32, ptr [[TMP8]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1150,7 +1140,7 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD:%.*]], [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: ; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] @@ -1175,32 +1165,26 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 ; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; SINK-AFTER-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 -; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD:%.*]], i32 3 ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 -; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2 -; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4 -; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6 -; SINK-AFTER-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP3]], 2 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 +; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 2 +; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 4 +; SINK-AFTER-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 6 ; SINK-AFTER-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2 ; SINK-AFTER-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2 ; SINK-AFTER-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2 -; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]] -; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] +; SINK-AFTER-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[TMP7]], 2 +; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP8]] ; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] ; SINK-AFTER-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] -; SINK-AFTER-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 +; SINK-AFTER-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] ; SINK-AFTER-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP12]], align 4 -; SINK-AFTER-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP13]], align 4 -; SINK-AFTER-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP14]], align 4 -; SINK-AFTER-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0 -; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP16]], i32 1 -; SINK-AFTER-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP17]], i32 2 -; SINK-AFTER-NEXT: [[TMP24]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP18]], i32 3 +; SINK-AFTER-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP13]], align 4 +; SINK-AFTER-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP14]], align 4 +; SINK-AFTER-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP15]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1209,7 +1193,7 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD:%.*]], [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: ; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] @@ -1248,7 +1232,6 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ splat (i64 1), [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1276,7 +1259,6 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ 1, [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; UNROLL-NO-VF-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1304,7 +1286,6 @@ define i64 @constant_folded_previous_value() { ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ splat (i64 1), [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1357,9 +1338,8 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; UNROLL-NO-IC-NEXT: [[TMP0]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]] +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 @@ -1391,29 +1371,28 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[VECTOR_RECUR]], 1 -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i32 [[VECTOR_RECUR]], [[X:%.*]] -; UNROLL-NO-VF-NEXT: [[TMP4]] = add i32 [[TMP0]], [[X]] +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i32 [[VECTOR_RECUR]], [[X:%.*]] +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[X]] ; UNROLL-NO-VF-NEXT: [[TMP3]] = add nuw i32 [[VECTOR_RECUR]], 2 -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP3]], 96 -; UNROLL-NO-VF-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 96 +; UNROLL-NO-VF-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT1:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: -; UNROLL-NO-VF-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[INC]] = add i32 [[INC_PHI]], 1 -; UNROLL-NO-VF-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64 -; UNROLL-NO-VF-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]] -; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95 +; UNROLL-NO-VF-NEXT: [[VAL_PHI1:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT1]], [[SCALAR_PH]] ], [ [[ADDX1:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[ADDX]] = add i32 [[VAL_PHI]], 1 +; UNROLL-NO-VF-NEXT: [[BC:%.*]] = zext i32 [[VAL_PHI]] to i64 +; UNROLL-NO-VF-NEXT: [[ADDX1]] = add i32 [[VAL_PHI]], [[X]] +; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL_PHI]], 95 ; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; UNROLL-NO-VF: for.end: -; UNROLL-NO-VF-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[VAL_PHI]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[VAL_PHI1]], [[FOR_BODY]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[VAL_PHI_LCSSA]] ; ; SINK-AFTER-LABEL: @extract_second_last_iteration( @@ -1426,8 +1405,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP0]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; SINK-AFTER-NEXT: [[TMP0:%.*]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 @@ -2507,13 +2485,12 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1) -; UNROLL-NO-IC-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP1]], splat (i16 5) ; UNROLL-NO-IC-NEXT: [[TMP5]] = add <4 x i16> [[TMP2]], splat (i16 5) ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> @@ -2561,13 +2538,12 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR1:%.*]] = phi i32 [ -27, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]] ; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i16 [[TMP1]], 1 -; UNROLL-NO-VF-NEXT: [[TMP4]] = zext i16 [[TMP3]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add i16 [[TMP2]], 5 ; UNROLL-NO-VF-NEXT: [[TMP6]] = add i16 [[TMP3]], 5 ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sub i16 [[VECTOR_RECUR]], 10 @@ -2610,11 +2586,10 @@ define void @sink_dead_inst(ptr %a) { ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]] ; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) -; SINK-AFTER-NEXT: [[TMP2]] = zext <4 x i16> [[TMP1]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP3]] = add <4 x i16> [[TMP1]], splat (i16 5) ; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> ; SINK-AFTER-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], splat (i16 10) @@ -3500,12 +3475,11 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1) ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]] ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 4 @@ -3548,15 +3522,14 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[VECTOR_RECUR]] to i16 ; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i16 [[TMP1]], 1 +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i16 [[TMP1]], 1 +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = or i16 [[TMP5]], [[TMP5]] ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = or i16 [[TMP2]], [[TMP2]] -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = or i16 [[TMP3]], [[TMP3]] -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP10]] = zext i16 [[TMP5]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = zext i16 [[TMP3]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = zext i16 [[TMP4]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]] ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[TMP1]] ; UNROLL-NO-VF-NEXT: store i32 0, ptr [[TMP8]], align 4 @@ -3596,11 +3569,10 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 ; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) ; SINK-AFTER-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] -; SINK-AFTER-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]] ; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 ; SINK-AFTER-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 @@ -3668,10 +3640,9 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1) -; UNROLL-NO-IC-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], splat (i16 5) +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[TMP0]], splat (i16 5) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 @@ -3701,12 +3672,11 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]] ; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 1 -; UNROLL-NO-VF-NEXT: [[TMP2]] = add i16 [[TMP1]], 5 +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i16 [[TMP1]], 5 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1028 ; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] @@ -3735,9 +3705,8 @@ define void @unused_recurrence(ptr %a) { ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) -; SINK-AFTER-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], splat (i16 5) +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[TMP0]], splat (i16 5) ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1028 diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index f54d3ba..db25e7b 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -243,10 +243,9 @@ define void @first_order_recurrence_using_induction(i32 %n, ptr %dst) { ; CHECK-LABEL: @first_order_recurrence_using_induction( ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDUCTION1:%.*]], %vector.body ] ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[INDUCTION:%.*]] = add i32 [[TMP3]], 0 -; CHECK-NEXT: [[INDUCTION1]] = add i32 [[TMP3]], 1 +; CHECK-NEXT: [[INDUCTION1:%.*]] = add i32 [[TMP3]], 1 ; CHECK-NEXT: store i32 [[INDUCTION]], ptr [[DST]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], %n.vec diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index c7149b0..ebddca2 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -621,8 +621,7 @@ define i32 @pr45526_pgso() !prof !14 { ; NPGSO: [[VECTOR_BODY]]: ; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; NPGSO-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; NPGSO-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 5>, %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ] -; NPGSO-NEXT: [[TMP0]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1) +; NPGSO-NEXT: [[TMP0:%.*]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1) ; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; NPGSO-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index e1b2646..a70d8f7 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -580,14 +580,9 @@ define i64 @constant_folded_previous_value() { ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]] ; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VF4UF1-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF1-NEXT: [[TMP7:%.*]] = mul i32 [[TMP9]], 4 -; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 -; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 0, i32 [[TMP8]] ; CHECK-VF4UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4UF1: [[VECTOR_BODY]]: ; CHECK-VF4UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ splat (i64 1), %[[VECTOR_BODY]] ] ; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] @@ -613,14 +608,9 @@ define i64 @constant_folded_previous_value() { ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = mul i32 [[TMP9]], 4 -; CHECK-VF4UF2-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 -; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 0, i32 [[TMP8]] ; CHECK-VF4UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4UF2: [[VECTOR_BODY]]: ; CHECK-VF4UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ splat (i64 1), %[[VECTOR_BODY]] ] ; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] @@ -676,16 +666,11 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = mul i32 1, [[TMP5]] ; CHECK-VF4UF1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP8]], i64 0 ; CHECK-VF4UF1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -; CHECK-VF4UF1-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF1-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 4 -; CHECK-VF4UF1-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 -; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP19]] ; CHECK-VF4UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4UF1: [[VECTOR_BODY]]: ; CHECK-VF4UF1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4UF1-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4UF1-NEXT: [[TMP9]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-VF4UF1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] ; CHECK-VF4UF1-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-VF4UF1-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -729,19 +714,14 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF2-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]] ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -; CHECK-VF4UF2-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF2-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 4 -; CHECK-VF4UF2-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 -; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP19]] ; CHECK-VF4UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4UF2: [[VECTOR_BODY]]: ; CHECK-VF4UF2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4UF2-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4UF2-NEXT: [[STEP_ADD:%.*]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; CHECK-VF4UF2-NEXT: [[TMP9]] = add <vscale x 4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]] +; CHECK-VF4UF2-NEXT: [[STEP_ADD:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT1:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4UF2-NEXT: [[VEC_IND_NEXT:%.*]] = add <vscale x 4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT2]] +; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[VEC_IND_NEXT]], [[BROADCAST_SPLAT]] ; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]] -; CHECK-VF4UF2-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT2]] +; CHECK-VF4UF2-NEXT: [[VEC_IND_NEXT1]] = add <vscale x 4 x i32> [[VEC_IND_NEXT]], [[BROADCAST_SPLAT2]] ; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4UF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-VF4UF2: [[MIDDLE_BLOCK]]: |