diff options
author | Florian Hahn <flo@fhahn.com> | 2024-01-29 09:52:05 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-29 09:52:05 +0000 |
commit | 743946e8ef0d164cbaa3409d11b218e299ccd35e (patch) | |
tree | 56bb87b5499fc28254cc57cd06ad66f1df33bb69 | |
parent | ce8032394fa4ff55c36d857e85241c1bd0cacc60 (diff) | |
download | llvm-743946e8ef0d164cbaa3409d11b218e299ccd35e.zip llvm-743946e8ef0d164cbaa3409d11b218e299ccd35e.tar.gz llvm-743946e8ef0d164cbaa3409d11b218e299ccd35e.tar.bz2 |
[VPlan] Replace VPRecipeOrVPValue with VP2VP recipe simplification. (#76090)
Move simplification of VPBlendRecipes from early VPlan construction to
VPlan-to-VPlan based recipe simplification. This simplifies initial
construction.
Note that some in-loop reduction tests are failing at the moment, due to
the reduction predicate being created after the reduction recipe. I will
provide a patch for that soon.
PR: https://github.com/llvm/llvm-project/pull/76090
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 133 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h | 50 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 13 |
3 files changed, 92 insertions, 104 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3aff0a0..3483e2c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8112,10 +8112,9 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { BlockMaskCache[BB] = BlockMask; } -VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, - ArrayRef<VPValue *> Operands, - VFRange &Range, - VPlanPtr &Plan) { +VPWidenMemoryInstructionRecipe * +VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands, + VFRange &Range, VPlanPtr &Plan) { assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Must be called with either a load or store"); @@ -8187,7 +8186,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc, return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc); } -VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI( +VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI( PHINode *Phi, ArrayRef<VPValue *> Operands, VPlan &Plan, VFRange &Range) { // Check if this is an integer or fp induction. If so, build the recipe that @@ -8239,31 +8238,10 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( return nullptr; } -VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi, - ArrayRef<VPValue *> Operands, - VPlanPtr &Plan) { - // If all incoming values are equal, the incoming VPValue can be used directly - // instead of creating a new VPBlendRecipe. - if (llvm::all_equal(Operands)) - return Operands[0]; - +VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, + ArrayRef<VPValue *> Operands, + VPlanPtr &Plan) { unsigned NumIncoming = Phi->getNumIncomingValues(); - // For in-loop reductions, we do not need to create an additional select. - VPValue *InLoopVal = nullptr; - for (unsigned In = 0; In < NumIncoming; In++) { - PHINode *PhiOp = - dyn_cast_or_null<PHINode>(Operands[In]->getUnderlyingValue()); - if (PhiOp && CM.isInLoopReduction(PhiOp)) { - assert(!InLoopVal && "Found more than one in-loop reduction!"); - InLoopVal = Operands[In]; - } - } - - assert((!InLoopVal || NumIncoming == 2) && - "Found an in-loop reduction for PHI with unexpected number of " - "incoming values"); - if (InLoopVal) - return Operands[Operands[0] == InLoopVal ? 1 : 0]; // We know that all PHIs in non-header blocks are converted into selects, so // we don't have to worry about the insertion order and we can just use the @@ -8273,15 +8251,18 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi, SmallVector<VPValue *, 2> OperandsWithMask; for (unsigned In = 0; In < NumIncoming; In++) { + OperandsWithMask.push_back(Operands[In]); VPValue *EdgeMask = createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), *Plan); - assert((EdgeMask || NumIncoming == 1) && - "Multiple predecessors with one having a full mask"); - OperandsWithMask.push_back(Operands[In]); - if (EdgeMask) - OperandsWithMask.push_back(EdgeMask); + if (!EdgeMask) { + assert(In == 0 && "Both null and non-null edge masks found"); + assert(all_equal(Operands) && + "Distinct incoming values with one having a full mask"); + break; + } + OperandsWithMask.push_back(EdgeMask); } - return toVPRecipeResult(new VPBlendRecipe(Phi, OperandsWithMask)); + return new VPBlendRecipe(Phi, OperandsWithMask); } VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, @@ -8390,9 +8371,9 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const { Range); } -VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I, - ArrayRef<VPValue *> Operands, - VPBasicBlock *VPBB, VPlanPtr &Plan) { +VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, + ArrayRef<VPValue *> Operands, + VPBasicBlock *VPBB, VPlanPtr &Plan) { switch (I->getOpcode()) { default: return nullptr; @@ -8449,9 +8430,9 @@ void VPRecipeBuilder::fixHeaderPhis() { } } -VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I, - VFRange &Range, - VPlan &Plan) { +VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I, + VFRange &Range, + VPlan &Plan) { bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange( [&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); }, Range); @@ -8503,14 +8484,12 @@ VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I, auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()), IsUniform, BlockInMask); - return toVPRecipeResult(Recipe); + return Recipe; } -VPRecipeOrVPValueTy -VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, - ArrayRef<VPValue *> Operands, - VFRange &Range, VPBasicBlock *VPBB, - VPlanPtr &Plan) { +VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( + Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range, + VPBasicBlock *VPBB, VPlanPtr &Plan) { // First, check for specific widening recipes that deal with inductions, Phi // nodes, calls and memory operations. VPRecipeBase *Recipe; @@ -8523,7 +8502,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, recordRecipeOf(Phi); if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range))) - return toVPRecipeResult(Recipe); + return Recipe; VPHeaderPHIRecipe *PhiRecipe = nullptr; assert((Legal->isReductionVariable(Phi) || @@ -8555,13 +8534,13 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, recordRecipeOf(Inc); PhisToFix.push_back(PhiRecipe); - return toVPRecipeResult(PhiRecipe); + return PhiRecipe; } if (isa<TruncInst>(Instr) && (Recipe = tryToOptimizeInductionTruncate(cast<TruncInst>(Instr), Operands, Range, *Plan))) - return toVPRecipeResult(Recipe); + return Recipe; // All widen recipes below deal only with VF > 1. if (LoopVectorizationPlanner::getDecisionAndClampRange( @@ -8569,29 +8548,29 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, return nullptr; if (auto *CI = dyn_cast<CallInst>(Instr)) - return toVPRecipeResult(tryToWidenCall(CI, Operands, Range, Plan)); + return tryToWidenCall(CI, Operands, Range, Plan); if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr)) - return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan)); + return tryToWidenMemory(Instr, Operands, Range, Plan); if (!shouldWiden(Instr, Range)) return nullptr; if (auto GEP = dyn_cast<GetElementPtrInst>(Instr)) - return toVPRecipeResult(new VPWidenGEPRecipe( - GEP, make_range(Operands.begin(), Operands.end()))); + return new VPWidenGEPRecipe(GEP, + make_range(Operands.begin(), Operands.end())); if (auto *SI = dyn_cast<SelectInst>(Instr)) { - return toVPRecipeResult(new VPWidenSelectRecipe( - *SI, make_range(Operands.begin(), Operands.end()))); + return new VPWidenSelectRecipe( + *SI, make_range(Operands.begin(), Operands.end())); } if (auto *CI = dyn_cast<CastInst>(Instr)) { - return toVPRecipeResult(new VPWidenCastRecipe(CI->getOpcode(), Operands[0], - CI->getType(), *CI)); + return new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(), + *CI); } - return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan)); + return tryToWiden(Instr, Operands, VPBB, Plan); } void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, @@ -8779,22 +8758,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) continue; - auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe( + VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe( Instr, Operands, Range, VPBB, Plan); - if (!RecipeOrValue) - RecipeOrValue = RecipeBuilder.handleReplication(Instr, Range, *Plan); - // If Instr can be simplified to an existing VPValue, use it. - if (isa<VPValue *>(RecipeOrValue)) { - auto *VPV = cast<VPValue *>(RecipeOrValue); - Plan->addVPValue(Instr, VPV); - // If the re-used value is a recipe, register the recipe for the - // instruction, in case the recipe for Instr needs to be recorded. - if (VPRecipeBase *R = VPV->getDefiningRecipe()) - RecipeBuilder.setRecipe(Instr, R); - continue; - } - // Otherwise, add the new recipe. - VPRecipeBase *Recipe = cast<VPRecipeBase *>(RecipeOrValue); + if (!Recipe) + Recipe = RecipeBuilder.handleReplication(Instr, Range, *Plan); for (auto *Def : Recipe->definedValues()) { auto *UV = Def->getUnderlyingValue(); Plan->addVPValue(UV, Def); @@ -9041,7 +9008,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // the phi until LoopExitValue. We keep track of the previous item // (PreviousLink) to tell which of the two operands of a Link will remain // scalar and which will be reduced. For minmax by select(cmp), Link will be - // the select instructions. + // the select instructions. Blend recipes of in-loop reduction phi's will + // get folded to their non-phi operand, as the reduction recipe handles the + // condition directly. VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0]. for (VPSingleDefRecipe *CurrentLink : Worklist.getArrayRef().drop_front()) { Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr(); @@ -9072,6 +9041,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator()); VecOp = FMulRecipe; } else { + auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink); + if (PhiR->isInLoop() && Blend) { + assert(Blend->getNumIncomingValues() == 2 && + "Blend must have 2 incoming values"); + if (Blend->getIncomingValue(0) == PhiR) + Blend->replaceAllUsesWith(Blend->getIncomingValue(1)); + else { + assert(Blend->getIncomingValue(1) == PhiR && + "PhiR must be an operand of the blend"); + Blend->replaceAllUsesWith(Blend->getIncomingValue(0)); + } + continue; + } + if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { if (isa<VPWidenRecipe>(CurrentLink)) { assert(isa<CmpInst>(CurrentLinkI) && diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 5645cfa1..b149802 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -21,8 +21,6 @@ class LoopVectorizationLegality; class LoopVectorizationCostModel; class TargetLibraryInfo; -using VPRecipeOrVPValueTy = PointerUnion<VPRecipeBase *, VPValue *>; - /// Helper class to create VPRecipies from IR instructions. class VPRecipeBuilder { /// The loop that we evaluate. @@ -69,14 +67,16 @@ class VPRecipeBuilder { /// Check if the load or store instruction \p I should widened for \p /// Range.Start and potentially masked. Such instructions are handled by a /// recipe that takes an additional VPInstruction for the mask. - VPRecipeBase *tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands, - VFRange &Range, VPlanPtr &Plan); + VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I, + ArrayRef<VPValue *> Operands, + VFRange &Range, + VPlanPtr &Plan); /// Check if an induction recipe should be constructed for \p Phi. If so build /// and return it. If not, return null. - VPRecipeBase *tryToOptimizeInductionPHI(PHINode *Phi, - ArrayRef<VPValue *> Operands, - VPlan &Plan, VFRange &Range); + VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi, + ArrayRef<VPValue *> Operands, + VPlan &Plan, VFRange &Range); /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. @@ -84,12 +84,11 @@ class VPRecipeBuilder { tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands, VFRange &Range, VPlan &Plan); - /// Handle non-loop phi nodes. Return a VPValue, if all incoming values match - /// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned - /// into a sequence of select instructions as the vectorizer currently - /// performs full if-conversion. - VPRecipeOrVPValueTy tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands, - VPlanPtr &Plan); + /// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently + /// all such phi nodes are turned into a sequence of select instructions as + /// the vectorizer currently performs full if-conversion. + VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands, + VPlanPtr &Plan); /// Handle call instructions. If \p CI can be widened for \p Range.Start, /// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same @@ -100,11 +99,8 @@ class VPRecipeBuilder { /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe /// if it can. The function should only be called if the cost-model indicates /// that widening should be performed. - VPRecipeBase *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands, - VPBasicBlock *VPBB, VPlanPtr &Plan); - - /// Return a VPRecipeOrValueTy with VPRecipeBase * being set. This can be used to force the use as VPRecipeBase* for recipe sub-types that also inherit from VPValue. - VPRecipeOrVPValueTy toVPRecipeResult(VPRecipeBase *R) const { return R; } + VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands, + VPBasicBlock *VPBB, VPlanPtr &Plan); public: VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI, @@ -114,14 +110,12 @@ public: : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), PSE(PSE), Builder(Builder) {} - /// Check if an existing VPValue can be used for \p Instr or a recipe can be - /// create for \p I withing the given VF \p Range. If an existing VPValue can - /// be used or if a recipe can be created, return it. Otherwise return a - /// VPRecipeOrVPValueTy with nullptr. - VPRecipeOrVPValueTy tryToCreateWidenRecipe(Instruction *Instr, - ArrayRef<VPValue *> Operands, - VFRange &Range, VPBasicBlock *VPBB, - VPlanPtr &Plan); + /// Create and return a widened recipe for \p I if one can be created within + /// the given VF \p Range. + VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr, + ArrayRef<VPValue *> Operands, + VFRange &Range, VPBasicBlock *VPBB, + VPlanPtr &Plan); /// Set the recipe created for given ingredient. This operation is a no-op for /// ingredients that were not marked using a nullptr entry in the map. @@ -172,8 +166,8 @@ public: /// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as /// last operand. Range.End may be decreased to ensure same recipe behavior /// from \p Range.Start to \p Range.End. - VPRecipeOrVPValueTy handleReplication(Instruction *I, VFRange &Range, - VPlan &Plan); + VPReplicateRecipe *handleReplication(Instruction *I, VFRange &Range, + VPlan &Plan); /// Add the incoming values from the backedge to reduction & first-order /// recurrence cross-iteration phis. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b4d913d..5d7ac2b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -827,6 +827,17 @@ static unsigned getOpcodeForRecipe(VPRecipeBase &R) { /// Try to simplify recipe \p R. static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { + // Try to remove redundant blend recipes. + if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) { + VPValue *Inc0 = Blend->getIncomingValue(0); + for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I) + if (Inc0 != Blend->getIncomingValue(I)) + return; + Blend->replaceAllUsesWith(Inc0); + Blend->eraseFromParent(); + return; + } + switch (getOpcodeForRecipe(R)) { case Instruction::Mul: { VPValue *A = R.getOperand(0); @@ -1031,8 +1042,8 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) { removeRedundantCanonicalIVs(Plan); removeRedundantInductionCasts(Plan); - optimizeInductions(Plan, SE); simplifyRecipes(Plan, SE.getContext()); + optimizeInductions(Plan, SE); removeDeadRecipes(Plan); createAndOptimizeReplicateRegions(Plan); |