From 8578b6e9120cb2fc8a7fc102587d617144c35532 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 18 Mar 2024 19:23:36 +0000 Subject: [VPlan] Store VPlan directly in VPRecipeBuilder (NFCI). Instead of passing VPlan in a number of places, just store it directly in VPRecipeBuilder. A single instance is only used for a single VPlan. This simplifies the code and was suggested by @nikolaypanchenko in https://github.com/llvm/llvm-project/pull/84464. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 132 ++++++++++++------------ llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h | 35 +++---- 2 files changed, 83 insertions(+), 84 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 52b992b..2163930 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7898,8 +7898,7 @@ void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF, } } -VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst, - VPlan &Plan) { +VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { assert(is_contained(predecessors(Dst), Src) && "Invalid edge"); // Look for cached value. @@ -7954,7 +7953,7 @@ VPValue *VPRecipeBuilder::getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const { return ECEntryIt->second; } -void VPRecipeBuilder::createHeaderMask(VPlan &Plan) { +void VPRecipeBuilder::createHeaderMask() { BasicBlock *Header = OrigLoop->getHeader(); // When not folding the tail, use nullptr to model all-true mask. @@ -7989,7 +7988,7 @@ VPValue *VPRecipeBuilder::getBlockInMask(BasicBlock *BB) const { return BCEntryIt->second; } -void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { +void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) { assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); assert(BlockMaskCache.count(BB) == 0 && "Mask for block already computed"); assert(OrigLoop->getHeader() != BB && @@ -8000,7 +7999,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { VPValue *BlockMask = nullptr; // This is the block mask. We OR all incoming edges. for (auto *Predecessor : predecessors(BB)) { - VPValue *EdgeMask = createEdgeMask(Predecessor, BB, Plan); + VPValue *EdgeMask = createEdgeMask(Predecessor, BB); if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is too. BlockMaskCache[BB] = EdgeMask; return; @@ -8019,7 +8018,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { VPWidenMemoryInstructionRecipe * VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef Operands, - VFRange &Range, VPlanPtr &Plan) { + VFRange &Range) { assert((isa(I) || isa(I)) && "Must be called with either a load or store"); @@ -8092,7 +8091,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc, } VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( - PHINode *Phi, ArrayRef Operands, VPlan &Plan, VFRange &Range) { + PHINode *Phi, ArrayRef Operands, VFRange &Range) { // Check if this is an integer or fp induction. If so, build the recipe that // produces its scalar and vector values. @@ -8116,7 +8115,7 @@ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( } VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( - TruncInst *I, ArrayRef Operands, VFRange &Range, VPlan &Plan) { + TruncInst *I, ArrayRef Operands, VFRange &Range) { // Optimize the special case where the source is a constant integer // induction variable. Notice that we can only optimize the 'trunc' case // because (a) FP conversions lose precision, (b) sext/zext may wrap, and @@ -8144,8 +8143,7 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( } VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, - ArrayRef Operands, - VPlanPtr &Plan) { + ArrayRef Operands) { unsigned NumIncoming = Phi->getNumIncomingValues(); // We know that all PHIs in non-header blocks are converted into selects, so @@ -8158,7 +8156,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, for (unsigned In = 0; In < NumIncoming; In++) { OperandsWithMask.push_back(Operands[In]); VPValue *EdgeMask = - createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), *Plan); + createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent()); if (!EdgeMask) { assert(In == 0 && "Both null and non-null edge masks found"); assert(all_equal(Operands) && @@ -8172,8 +8170,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, ArrayRef Operands, - VFRange &Range, - VPlanPtr &Plan) { + VFRange &Range) { bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( [this, CI](ElementCount VF) { return CM.isScalarWithPredication(CI, VF); @@ -8248,7 +8245,7 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, if (Legal->isMaskRequired(CI)) Mask = getBlockInMask(CI->getParent()); else - Mask = Plan->getVPValueOrAddLiveIn(ConstantInt::getTrue( + Mask = Plan.getVPValueOrAddLiveIn(ConstantInt::getTrue( IntegerType::getInt1Ty(Variant->getFunctionType()->getContext()))); Ops.insert(Ops.begin() + *MaskPos, Mask); @@ -8278,7 +8275,7 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const { VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, ArrayRef Operands, - VPBasicBlock *VPBB, VPlanPtr &Plan) { + VPBasicBlock *VPBB) { switch (I->getOpcode()) { default: return nullptr; @@ -8291,8 +8288,8 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, if (CM.isPredicatedInst(I)) { SmallVector Ops(Operands.begin(), Operands.end()); VPValue *Mask = getBlockInMask(I->getParent()); - VPValue *One = Plan->getVPValueOrAddLiveIn( - ConstantInt::get(I->getType(), 1u, false)); + VPValue *One = + Plan.getVPValueOrAddLiveIn(ConstantInt::get(I->getType(), 1u, false)); auto *SafeRHS = new VPInstruction(Instruction::Select, {Mask, Ops[1], One}, I->getDebugLoc()); @@ -8336,8 +8333,7 @@ void VPRecipeBuilder::fixHeaderPhis() { } VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I, - VFRange &Range, - VPlan &Plan) { + VFRange &Range) { bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange( [&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); }, Range); @@ -8392,21 +8388,22 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I, return Recipe; } -VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( - Instruction *Instr, ArrayRef Operands, VFRange &Range, - VPBasicBlock *VPBB, VPlanPtr &Plan) { +VPRecipeBase * +VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, + ArrayRef Operands, + VFRange &Range, VPBasicBlock *VPBB) { // First, check for specific widening recipes that deal with inductions, Phi // nodes, calls and memory operations. VPRecipeBase *Recipe; if (auto Phi = dyn_cast(Instr)) { if (Phi->getParent() != OrigLoop->getHeader()) - return tryToBlend(Phi, Operands, Plan); + return tryToBlend(Phi, Operands); // Always record recipes for header phis. Later first-order recurrence phis // can have earlier phis as incoming values. recordRecipeOf(Phi); - if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range))) + if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range))) return Recipe; VPHeaderPHIRecipe *PhiRecipe = nullptr; @@ -8442,9 +8439,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( return PhiRecipe; } - if (isa(Instr) && - (Recipe = tryToOptimizeInductionTruncate(cast(Instr), Operands, - Range, *Plan))) + if (isa(Instr) && (Recipe = tryToOptimizeInductionTruncate( + cast(Instr), Operands, Range))) return Recipe; // All widen recipes below deal only with VF > 1. @@ -8453,10 +8449,10 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( return nullptr; if (auto *CI = dyn_cast(Instr)) - return tryToWidenCall(CI, Operands, Range, Plan); + return tryToWidenCall(CI, Operands, Range); if (isa(Instr) || isa(Instr)) - return tryToWidenMemory(Instr, Operands, Range, Plan); + return tryToWidenMemory(Instr, Operands, Range); if (!shouldWiden(Instr, Range)) return nullptr; @@ -8475,7 +8471,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( *CI); } - return tryToWiden(Instr, Operands, VPBB, Plan); + return tryToWiden(Instr, Operands, VPBB); } void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, @@ -8547,37 +8543,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { SmallPtrSet *, 1> InterleaveGroups; - VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder); - - // --------------------------------------------------------------------------- - // Pre-construction: record ingredients whose recipes we'll need to further - // process after constructing the initial VPlan. - // --------------------------------------------------------------------------- - - // For each interleave group which is relevant for this (possibly trimmed) - // Range, add it to the set of groups to be later applied to the VPlan and add - // placeholders for its members' Recipes which we'll be replacing with a - // single VPInterleaveRecipe. - for (InterleaveGroup *IG : IAI.getInterleaveGroups()) { - auto applyIG = [IG, this](ElementCount VF) -> bool { - bool Result = (VF.isVector() && // Query is illegal for VF == 1 - CM.getWideningDecision(IG->getInsertPos(), VF) == - LoopVectorizationCostModel::CM_Interleave); - // For scalable vectors, the only interleave factor currently supported - // is 2 since we require the (de)interleave2 intrinsics instead of - // shufflevectors. - assert((!Result || !VF.isScalable() || IG->getFactor() == 2) && - "Unsupported interleave factor for scalable vectors"); - return Result; - }; - if (!getDecisionAndClampRange(applyIG, Range)) - continue; - InterleaveGroups.insert(IG); - for (unsigned i = 0; i < IG->getFactor(); i++) - if (Instruction *Member = IG->getMember(i)) - RecipeBuilder.recordRecipeOf(Member); - }; - // --------------------------------------------------------------------------- // Build initial VPlan: Scan the body of the loop in a topological order to // visit each basic block after having visited its predecessor basic blocks. @@ -8612,6 +8577,41 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { bool HasNUW = Style == TailFoldingStyle::None; addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL); + VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder); + + // --------------------------------------------------------------------------- + // Pre-construction: record ingredients whose recipes we'll need to further + // process after constructing the initial VPlan. + // --------------------------------------------------------------------------- + + // For each interleave group which is relevant for this (possibly trimmed) + // Range, add it to the set of groups to be later applied to the VPlan and add + // placeholders for its members' Recipes which we'll be replacing with a + // single VPInterleaveRecipe. + for (InterleaveGroup *IG : IAI.getInterleaveGroups()) { + auto applyIG = [IG, this](ElementCount VF) -> bool { + bool Result = (VF.isVector() && // Query is illegal for VF == 1 + CM.getWideningDecision(IG->getInsertPos(), VF) == + LoopVectorizationCostModel::CM_Interleave); + // For scalable vectors, the only interleave factor currently supported + // is 2 since we require the (de)interleave2 intrinsics instead of + // shufflevectors. + assert((!Result || !VF.isScalable() || IG->getFactor() == 2) && + "Unsupported interleave factor for scalable vectors"); + return Result; + }; + if (!getDecisionAndClampRange(applyIG, Range)) + continue; + InterleaveGroups.insert(IG); + for (unsigned i = 0; i < IG->getFactor(); i++) + if (Instruction *Member = IG->getMember(i)) + RecipeBuilder.recordRecipeOf(Member); + }; + + // --------------------------------------------------------------------------- + // Construct recipes for the instructions in the loop + // --------------------------------------------------------------------------- + // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. LoopBlocksDFS DFS(OrigLoop); @@ -8633,9 +8633,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { Builder.setInsertPoint(VPBB); if (VPBB == HeaderVPBB) - RecipeBuilder.createHeaderMask(*Plan); + RecipeBuilder.createHeaderMask(); else if (NeedsMasks) - RecipeBuilder.createBlockInMask(BB, *Plan); + RecipeBuilder.createBlockInMask(BB); // Introduce each ingredient into VPlan. // TODO: Model and preserve debug intrinsics in VPlan. @@ -8658,10 +8658,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) continue; - VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe( - Instr, Operands, Range, VPBB, Plan); + VPRecipeBase *Recipe = + RecipeBuilder.tryToCreateWidenRecipe(Instr, Operands, Range, VPBB); if (!Recipe) - Recipe = RecipeBuilder.handleReplication(Instr, Range, *Plan); + Recipe = RecipeBuilder.handleReplication(Instr, Range); for (auto *Def : Recipe->definedValues()) { auto *UV = Def->getUnderlyingValue(); Plan->addVPValue(UV, Def); diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index b149802..29a395c 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -23,6 +23,9 @@ class TargetLibraryInfo; /// Helper class to create VPRecipies from IR instructions. class VPRecipeBuilder { + /// The VPlan new recipes are added to. + VPlan &Plan; + /// The loop that we evaluate. Loop *OrigLoop; @@ -69,53 +72,50 @@ class VPRecipeBuilder { /// recipe that takes an additional VPInstruction for the mask. VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I, ArrayRef Operands, - VFRange &Range, - VPlanPtr &Plan); + VFRange &Range); /// Check if an induction recipe should be constructed for \p Phi. If so build /// and return it. If not, return null. VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef Operands, - VPlan &Plan, VFRange &Range); + VFRange &Range); /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. VPWidenIntOrFpInductionRecipe * tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef Operands, - VFRange &Range, VPlan &Plan); + VFRange &Range); /// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently /// all such phi nodes are turned into a sequence of select instructions as /// the vectorizer currently performs full if-conversion. - VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef Operands, - VPlanPtr &Plan); + VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef Operands); /// Handle call instructions. If \p CI can be widened for \p Range.Start, /// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same /// decision from \p Range.Start to \p Range.End. VPWidenCallRecipe *tryToWidenCall(CallInst *CI, ArrayRef Operands, - VFRange &Range, VPlanPtr &Plan); + VFRange &Range); /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe /// if it can. The function should only be called if the cost-model indicates /// that widening should be performed. VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef Operands, - VPBasicBlock *VPBB, VPlanPtr &Plan); + VPBasicBlock *VPBB); public: - VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI, + VPRecipeBuilder(VPlan &Plan, Loop *OrigLoop, const TargetLibraryInfo *TLI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, PredicatedScalarEvolution &PSE, VPBuilder &Builder) - : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), PSE(PSE), - Builder(Builder) {} + : Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), + PSE(PSE), Builder(Builder) {} /// Create and return a widened recipe for \p I if one can be created within /// the given VF \p Range. VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr, ArrayRef Operands, - VFRange &Range, VPBasicBlock *VPBB, - VPlanPtr &Plan); + VFRange &Range, VPBasicBlock *VPBB); /// Set the recipe created for given ingredient. This operation is a no-op for /// ingredients that were not marked using a nullptr entry in the map. @@ -128,19 +128,19 @@ public: } /// Create the mask for the vector loop header block. - void createHeaderMask(VPlan &Plan); + void createHeaderMask(); /// A helper function that computes the predicate of the block BB, assuming /// that the header block of the loop is set to True or the loop mask when /// tail folding. - void createBlockInMask(BasicBlock *BB, VPlan &Plan); + void createBlockInMask(BasicBlock *BB); /// Returns the *entry* mask for the block \p BB. VPValue *getBlockInMask(BasicBlock *BB) const; /// A helper function that computes the predicate of the edge between SRC /// and DST. - VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlan &Plan); + VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst); /// A helper that returns the previously computed predicate of the edge /// between SRC and DST. @@ -166,8 +166,7 @@ public: /// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as /// last operand. Range.End may be decreased to ensure same recipe behavior /// from \p Range.Start to \p Range.End. - VPReplicateRecipe *handleReplication(Instruction *I, VFRange &Range, - VPlan &Plan); + VPReplicateRecipe *handleReplication(Instruction *I, VFRange &Range); /// Add the incoming values from the backedge to reduction & first-order /// recurrence cross-iteration phis. -- cgit v1.1