diff options
author | Florian Hahn <flo@fhahn.com> | 2025-04-26 22:03:18 +0100 |
---|---|---|
committer | Florian Hahn <flo@fhahn.com> | 2025-04-26 22:03:18 +0100 |
commit | 826f237cb4b44f6d95d66eea0e58f7d24585986b (patch) | |
tree | 63ea23edb2c2736517d3b9620046a5d1c156c257 | |
parent | d403c70ff31830bf5311be11b51363ae98d06068 (diff) | |
download | llvm-826f237cb4b44f6d95d66eea0e58f7d24585986b.zip llvm-826f237cb4b44f6d95d66eea0e58f7d24585986b.tar.gz llvm-826f237cb4b44f6d95d66eea0e58f7d24585986b.tar.bz2 |
[VPlan] Don't added separate vector latch block (NFC).
Simplify initial VPlan construction by not creating a separate
vector.latch block, which isn't needed and will get folded away later.
This has been suggested as independent clean-up multiple times.
4 files changed, 46 insertions, 65 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4beeabd..6b5b50a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9531,14 +9531,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi(); VPBlockBase *PrevVPBB = nullptr; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { - // Handle VPBBs down to the latch. - if (VPBB == LoopRegion->getExiting()) { - assert(!VPB2IRBB.contains(VPBB) && - "the latch block shouldn't have a corresponding IRBB"); - VPBlockUtils::connectBlocks(PrevVPBB, VPBB); - break; - } - // Create mask based on the IR BB corresponding to VPBB. // TODO: Predicate directly based on VPlan. Builder.setInsertPoint(VPBB, VPBB->begin()); @@ -9761,6 +9753,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { for (ElementCount VF : Range) Plan->addVF(VF); + // Tail folding is not supported for outer loops, so the induction increment + // is guaranteed to not wrap. + bool HasNUW = true; + addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, + DebugLoc()); + if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( Plan, [this](PHINode *P) { @@ -9769,12 +9767,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { *PSE.getSE(), *TLI)) return nullptr; - // Tail folding is not supported for outer loops, so the induction increment - // is guaranteed to not wrap. - bool HasNUW = true; - addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, - DebugLoc()); - // Collect mapping of IR header phis to header phi recipes, to be used in // addScalarResumePhis. VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE, @@ -9939,14 +9931,18 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // ensure that it comes after all of it's inputs, including CondOp. // Delete CurrentLink as it will be invalid if its operand is replaced // with a reduction defined at the bottom of the block in the next link. - LinkVPBB->appendRecipe(RedRecipe); + if (LinkVPBB->getNumSuccessors() == 0) + RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end()))); + else + LinkVPBB->appendRecipe(RedRecipe); + CurrentLink->replaceAllUsesWith(RedRecipe); ToDelete.push_back(CurrentLink); PreviousLink = RedRecipe; } } VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock(); - Builder.setInsertPoint(&*LatchVPBB->begin()); + Builder.setInsertPoint(&*std::prev(std::prev(LatchVPBB->end()))); VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi(); for (VPRecipeBase &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { @@ -9966,8 +9962,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( if (!PhiR->isInLoop() && CM.foldTailByMasking() && !isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) { VPValue *Cond = RecipeBuilder.getBlockInMask(OrigLoop->getHeader()); - assert(OrigExitingVPV->getDefiningRecipe()->getParent() != LatchVPBB && - "reduction recipe must be defined before latch"); Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType(); std::optional<FastMathFlags> FMFs = PhiTy->isFloatingPointTy() diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index f24d422..b374371 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -451,10 +451,6 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy, createLoopRegion(Plan, HeaderVPB); VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); - auto *OrigExiting = TopRegion->getExiting(); - VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch"); - VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting); - TopRegion->setExiting(LatchVPBB); TopRegion->setName("vector loop"); TopRegion->getEntryBasicBlock()->setName("vector.body"); @@ -472,7 +468,6 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy, VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph"); VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry()); - VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block"); VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 95dfeea..fa16a64 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -55,6 +55,9 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( make_early_inc_range(make_range(VPBB->begin(), EndIter))) { VPValue *VPV = Ingredient.getVPSingleValue(); + if (!VPV->getUnderlyingValue()) + continue; + Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue()); VPRecipeBase *NewRecipe = nullptr; @@ -387,9 +390,13 @@ static void addReplicateRegions(VPlan &Plan) { SplitBlock->setName( OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : ""); // Record predicated instructions for above packing optimizations. - VPBlockBase *Region = createReplicateRegion(RepR, Plan); + VPRegionBlock *Region = createReplicateRegion(RepR, Plan); Region->setParent(CurrentBlock->getParent()); VPBlockUtils::insertOnEdge(CurrentBlock, SplitBlock, Region); + + VPRegionBlock *ParentRegion = Region->getParent(); + if (ParentRegion && ParentRegion->getExiting() == CurrentBlock) + ParentRegion->setExiting(SplitBlock); } } diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index be98e86..11abdf7 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -53,14 +53,10 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) { VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock(); EXPECT_EQ(7u, VecBB->size()); EXPECT_EQ(0u, VecBB->getNumPredecessors()); - EXPECT_EQ(1u, VecBB->getNumSuccessors()); + EXPECT_EQ(0u, VecBB->getNumSuccessors()); EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB); EXPECT_EQ(&*Plan, VecBB->getPlan()); - VPBlockBase *VecLatch = VecBB->getSingleSuccessor(); - EXPECT_EQ(VecLatch->getParent()->getExitingBasicBlock(), VecLatch); - EXPECT_EQ(0u, VecLatch->getNumSuccessors()); - auto Iter = VecBB->begin(); VPWidenPHIRecipe *Phi = dyn_cast<VPWidenPHIRecipe>(&*Iter++); EXPECT_NE(nullptr, Phi); @@ -130,33 +126,28 @@ compound=true " EMIT store ir\<%res\>, ir\<%arr.idx\>\l" + " EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\>, ir\<1\>\l" + " EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\>, ir\<%N\>\l" + - "Successor(s): vector.latch\l" - ] - N2 -> N4 [ label=""] - N4 [label = - "vector.latch:\l" + "No successors\l" ] } - N4 -> N5 [ label="" ltail=cluster_N3] - N5 [label = + N2 -> N4 [ label="" ltail=cluster_N3] + N4 [label = "middle.block:\l" + " EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" + " EMIT branch-on-cond vp\<%cmp.n\>\l" + "Successor(s): ir-bb\<for.end\>, scalar.ph\l" ] - N5 -> N6 [ label="T"] - N5 -> N7 [ label="F"] - N6 [label = + N4 -> N5 [ label="T"] + N4 -> N6 [ label="F"] + N5 [label = "ir-bb\<for.end\>:\l" + "No successors\l" ] - N7 [label = + N6 [label = "scalar.ph:\l" + "Successor(s): ir-bb\<for.body\>\l" ] - N7 -> N8 [ label=""] - N8 [label = + N6 -> N7 [ label=""] + N7 [label = "ir-bb\<for.body\>:\l" + " IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\l" + " IR %arr.idx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv\l" + @@ -171,10 +162,6 @@ compound=true )"; EXPECT_EQ(ExpectedStr, FullDump); #endif - TargetLibraryInfoImpl TLII(M.getTargetTriple()); - TargetLibraryInfo TLI(TLII); - VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( - Plan, [](PHINode *P) { return nullptr; }, *SE, TLI); } TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) { @@ -203,6 +190,12 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) { TargetLibraryInfoImpl TLII(M.getTargetTriple()); TargetLibraryInfo TLI(TLII); + // Current VPlan construction doesn't add a terminator for top-level loop + // latches. Add it before running transform. + cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getExiting()) + ->appendRecipe(new VPInstruction( + VPInstruction::BranchOnCond, + {Plan->getOrAddLiveIn(ConstantInt::getTrue(F->getContext()))})); VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( Plan, [](PHINode *P) { return nullptr; }, *SE, TLI); @@ -214,15 +207,11 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) { // Check that the region following the preheader consists of a block for the // original header and a separate latch. VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock(); - EXPECT_EQ(7u, VecBB->size()); + EXPECT_EQ(8u, VecBB->size()); EXPECT_EQ(0u, VecBB->getNumPredecessors()); - EXPECT_EQ(1u, VecBB->getNumSuccessors()); + EXPECT_EQ(0u, VecBB->getNumSuccessors()); EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB); - VPBlockBase *VecLatch = VecBB->getSingleSuccessor(); - EXPECT_EQ(VecLatch->getParent()->getExitingBasicBlock(), VecLatch); - EXPECT_EQ(0u, VecLatch->getNumSuccessors()); - auto Iter = VecBB->begin(); EXPECT_NE(nullptr, dyn_cast<VPWidenPHIRecipe>(&*Iter++)); EXPECT_NE(nullptr, dyn_cast<VPWidenGEPRecipe>(&*Iter++)); @@ -231,6 +220,7 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) { EXPECT_NE(nullptr, dyn_cast<VPWidenMemoryRecipe>(&*Iter++)); EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++)); EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++)); + EXPECT_NE(nullptr, dyn_cast<VPInstruction>(&*Iter++)); EXPECT_EQ(VecBB->end(), Iter); } @@ -303,33 +293,28 @@ compound=true " EMIT store ir\<%res\>, ir\<%arr.idx\>\l" + " EMIT ir\<%iv.next\> = add ir\<%iv\>, ir\<1\>\l" + " EMIT ir\<%exitcond\> = icmp ir\<%iv.next\>, ir\<%N\>\l" + - "Successor(s): vector.latch\l" - ] - N4 -> N5 [ label=""] - N5 [label = - "vector.latch:\l" + "No successors\l" ] } - N5 -> N6 [ label="" ltail=cluster_N3] - N6 [label = + N4 -> N5 [ label="" ltail=cluster_N3] + N5 [label = "middle.block:\l" + " EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" + " EMIT branch-on-cond vp\<%cmp.n\>\l" + "Successor(s): ir-bb\<exit.2\>, scalar.ph\l" ] - N6 -> N7 [ label="T"] - N6 -> N8 [ label="F"] - N7 [label = + N5 -> N6 [ label="T"] + N5 -> N7 [ label="F"] + N6 [label = "ir-bb\<exit.2\>:\l" + "No successors\l" ] - N8 [label = + N7 [label = "scalar.ph:\l" + "Successor(s): ir-bb\<loop.header\>\l" ] - N8 -> N9 [ label=""] - N9 [label = + N7 -> N8 [ label=""] + N8 [label = "ir-bb\<loop.header\>:\l" + " IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]\l" + " IR %arr.idx = getelementptr inbounds i32, ptr %A, i64 %iv\l" + |