aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Hahn <flo@fhahn.com>2024-03-18 19:23:36 +0000
committerFlorian Hahn <flo@fhahn.com>2024-03-18 19:23:37 +0000
commit8578b6e9120cb2fc8a7fc102587d617144c35532 (patch)
treed18ee1f7b1462a44c9f4869cc802bc16a826243d
parent20f5bcfb1a3b58c430a5df6dd837b30c22a2b788 (diff)
downloadllvm-8578b6e9120cb2fc8a7fc102587d617144c35532.zip
llvm-8578b6e9120cb2fc8a7fc102587d617144c35532.tar.gz
llvm-8578b6e9120cb2fc8a7fc102587d617144c35532.tar.bz2
[VPlan] Store VPlan directly in VPRecipeBuilder (NFCI).
Instead of passing VPlan in a number of places, just store it directly in VPRecipeBuilder. A single instance is only used for a single VPlan. This simplifies the code and was suggested by @nikolaypanchenko in https://github.com/llvm/llvm-project/pull/84464.
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp132
-rw-r--r--llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h35
2 files changed, 83 insertions, 84 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 52b992b..2163930 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7898,8 +7898,7 @@ void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
}
}
-VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
- VPlan &Plan) {
+VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
assert(is_contained(predecessors(Dst), Src) && "Invalid edge");
// Look for cached value.
@@ -7954,7 +7953,7 @@ VPValue *VPRecipeBuilder::getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const {
return ECEntryIt->second;
}
-void VPRecipeBuilder::createHeaderMask(VPlan &Plan) {
+void VPRecipeBuilder::createHeaderMask() {
BasicBlock *Header = OrigLoop->getHeader();
// When not folding the tail, use nullptr to model all-true mask.
@@ -7989,7 +7988,7 @@ VPValue *VPRecipeBuilder::getBlockInMask(BasicBlock *BB) const {
return BCEntryIt->second;
}
-void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
+void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
assert(BlockMaskCache.count(BB) == 0 && "Mask for block already computed");
assert(OrigLoop->getHeader() != BB &&
@@ -8000,7 +7999,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
VPValue *BlockMask = nullptr;
// This is the block mask. We OR all incoming edges.
for (auto *Predecessor : predecessors(BB)) {
- VPValue *EdgeMask = createEdgeMask(Predecessor, BB, Plan);
+ VPValue *EdgeMask = createEdgeMask(Predecessor, BB);
if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is too.
BlockMaskCache[BB] = EdgeMask;
return;
@@ -8019,7 +8018,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
VPWidenMemoryInstructionRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
- VFRange &Range, VPlanPtr &Plan) {
+ VFRange &Range) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
"Must be called with either a load or store");
@@ -8092,7 +8091,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
}
VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
- PHINode *Phi, ArrayRef<VPValue *> Operands, VPlan &Plan, VFRange &Range) {
+ PHINode *Phi, ArrayRef<VPValue *> Operands, VFRange &Range) {
// Check if this is an integer or fp induction. If so, build the recipe that
// produces its scalar and vector values.
@@ -8116,7 +8115,7 @@ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
}
VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
- TruncInst *I, ArrayRef<VPValue *> Operands, VFRange &Range, VPlan &Plan) {
+ TruncInst *I, ArrayRef<VPValue *> Operands, VFRange &Range) {
// Optimize the special case where the source is a constant integer
// induction variable. Notice that we can only optimize the 'trunc' case
// because (a) FP conversions lose precision, (b) sext/zext may wrap, and
@@ -8144,8 +8143,7 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
}
VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
- ArrayRef<VPValue *> Operands,
- VPlanPtr &Plan) {
+ ArrayRef<VPValue *> Operands) {
unsigned NumIncoming = Phi->getNumIncomingValues();
// We know that all PHIs in non-header blocks are converted into selects, so
@@ -8158,7 +8156,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
for (unsigned In = 0; In < NumIncoming; In++) {
OperandsWithMask.push_back(Operands[In]);
VPValue *EdgeMask =
- createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), *Plan);
+ createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent());
if (!EdgeMask) {
assert(In == 0 && "Both null and non-null edge masks found");
assert(all_equal(Operands) &&
@@ -8172,8 +8170,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
ArrayRef<VPValue *> Operands,
- VFRange &Range,
- VPlanPtr &Plan) {
+ VFRange &Range) {
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
[this, CI](ElementCount VF) {
return CM.isScalarWithPredication(CI, VF);
@@ -8248,7 +8245,7 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
if (Legal->isMaskRequired(CI))
Mask = getBlockInMask(CI->getParent());
else
- Mask = Plan->getVPValueOrAddLiveIn(ConstantInt::getTrue(
+ Mask = Plan.getVPValueOrAddLiveIn(ConstantInt::getTrue(
IntegerType::getInt1Ty(Variant->getFunctionType()->getContext())));
Ops.insert(Ops.begin() + *MaskPos, Mask);
@@ -8278,7 +8275,7 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
ArrayRef<VPValue *> Operands,
- VPBasicBlock *VPBB, VPlanPtr &Plan) {
+ VPBasicBlock *VPBB) {
switch (I->getOpcode()) {
default:
return nullptr;
@@ -8291,8 +8288,8 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
if (CM.isPredicatedInst(I)) {
SmallVector<VPValue *> Ops(Operands.begin(), Operands.end());
VPValue *Mask = getBlockInMask(I->getParent());
- VPValue *One = Plan->getVPValueOrAddLiveIn(
- ConstantInt::get(I->getType(), 1u, false));
+ VPValue *One =
+ Plan.getVPValueOrAddLiveIn(ConstantInt::get(I->getType(), 1u, false));
auto *SafeRHS =
new VPInstruction(Instruction::Select, {Mask, Ops[1], One},
I->getDebugLoc());
@@ -8336,8 +8333,7 @@ void VPRecipeBuilder::fixHeaderPhis() {
}
VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I,
- VFRange &Range,
- VPlan &Plan) {
+ VFRange &Range) {
bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
Range);
@@ -8392,21 +8388,22 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I,
return Recipe;
}
-VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
- Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range,
- VPBasicBlock *VPBB, VPlanPtr &Plan) {
+VPRecipeBase *
+VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
+ ArrayRef<VPValue *> Operands,
+ VFRange &Range, VPBasicBlock *VPBB) {
// First, check for specific widening recipes that deal with inductions, Phi
// nodes, calls and memory operations.
VPRecipeBase *Recipe;
if (auto Phi = dyn_cast<PHINode>(Instr)) {
if (Phi->getParent() != OrigLoop->getHeader())
- return tryToBlend(Phi, Operands, Plan);
+ return tryToBlend(Phi, Operands);
// Always record recipes for header phis. Later first-order recurrence phis
// can have earlier phis as incoming values.
recordRecipeOf(Phi);
- if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range)))
+ if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range)))
return Recipe;
VPHeaderPHIRecipe *PhiRecipe = nullptr;
@@ -8442,9 +8439,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
return PhiRecipe;
}
- if (isa<TruncInst>(Instr) &&
- (Recipe = tryToOptimizeInductionTruncate(cast<TruncInst>(Instr), Operands,
- Range, *Plan)))
+ if (isa<TruncInst>(Instr) && (Recipe = tryToOptimizeInductionTruncate(
+ cast<TruncInst>(Instr), Operands, Range)))
return Recipe;
// All widen recipes below deal only with VF > 1.
@@ -8453,10 +8449,10 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
return nullptr;
if (auto *CI = dyn_cast<CallInst>(Instr))
- return tryToWidenCall(CI, Operands, Range, Plan);
+ return tryToWidenCall(CI, Operands, Range);
if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
- return tryToWidenMemory(Instr, Operands, Range, Plan);
+ return tryToWidenMemory(Instr, Operands, Range);
if (!shouldWiden(Instr, Range))
return nullptr;
@@ -8475,7 +8471,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
*CI);
}
- return tryToWiden(Instr, Operands, VPBB, Plan);
+ return tryToWiden(Instr, Operands, VPBB);
}
void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
@@ -8547,37 +8543,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
- VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder);
-
- // ---------------------------------------------------------------------------
- // Pre-construction: record ingredients whose recipes we'll need to further
- // process after constructing the initial VPlan.
- // ---------------------------------------------------------------------------
-
- // For each interleave group which is relevant for this (possibly trimmed)
- // Range, add it to the set of groups to be later applied to the VPlan and add
- // placeholders for its members' Recipes which we'll be replacing with a
- // single VPInterleaveRecipe.
- for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
- auto applyIG = [IG, this](ElementCount VF) -> bool {
- bool Result = (VF.isVector() && // Query is illegal for VF == 1
- CM.getWideningDecision(IG->getInsertPos(), VF) ==
- LoopVectorizationCostModel::CM_Interleave);
- // For scalable vectors, the only interleave factor currently supported
- // is 2 since we require the (de)interleave2 intrinsics instead of
- // shufflevectors.
- assert((!Result || !VF.isScalable() || IG->getFactor() == 2) &&
- "Unsupported interleave factor for scalable vectors");
- return Result;
- };
- if (!getDecisionAndClampRange(applyIG, Range))
- continue;
- InterleaveGroups.insert(IG);
- for (unsigned i = 0; i < IG->getFactor(); i++)
- if (Instruction *Member = IG->getMember(i))
- RecipeBuilder.recordRecipeOf(Member);
- };
-
// ---------------------------------------------------------------------------
// Build initial VPlan: Scan the body of the loop in a topological order to
// visit each basic block after having visited its predecessor basic blocks.
@@ -8612,6 +8577,41 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
bool HasNUW = Style == TailFoldingStyle::None;
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
+ VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
+
+ // ---------------------------------------------------------------------------
+ // Pre-construction: record ingredients whose recipes we'll need to further
+ // process after constructing the initial VPlan.
+ // ---------------------------------------------------------------------------
+
+ // For each interleave group which is relevant for this (possibly trimmed)
+ // Range, add it to the set of groups to be later applied to the VPlan and add
+ // placeholders for its members' Recipes which we'll be replacing with a
+ // single VPInterleaveRecipe.
+ for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
+ auto applyIG = [IG, this](ElementCount VF) -> bool {
+ bool Result = (VF.isVector() && // Query is illegal for VF == 1
+ CM.getWideningDecision(IG->getInsertPos(), VF) ==
+ LoopVectorizationCostModel::CM_Interleave);
+ // For scalable vectors, the only interleave factor currently supported
+ // is 2 since we require the (de)interleave2 intrinsics instead of
+ // shufflevectors.
+ assert((!Result || !VF.isScalable() || IG->getFactor() == 2) &&
+ "Unsupported interleave factor for scalable vectors");
+ return Result;
+ };
+ if (!getDecisionAndClampRange(applyIG, Range))
+ continue;
+ InterleaveGroups.insert(IG);
+ for (unsigned i = 0; i < IG->getFactor(); i++)
+ if (Instruction *Member = IG->getMember(i))
+ RecipeBuilder.recordRecipeOf(Member);
+ };
+
+ // ---------------------------------------------------------------------------
+ // Construct recipes for the instructions in the loop
+ // ---------------------------------------------------------------------------
+
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
LoopBlocksDFS DFS(OrigLoop);
@@ -8633,9 +8633,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Builder.setInsertPoint(VPBB);
if (VPBB == HeaderVPBB)
- RecipeBuilder.createHeaderMask(*Plan);
+ RecipeBuilder.createHeaderMask();
else if (NeedsMasks)
- RecipeBuilder.createBlockInMask(BB, *Plan);
+ RecipeBuilder.createBlockInMask(BB);
// Introduce each ingredient into VPlan.
// TODO: Model and preserve debug intrinsics in VPlan.
@@ -8658,10 +8658,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
continue;
- VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe(
- Instr, Operands, Range, VPBB, Plan);
+ VPRecipeBase *Recipe =
+ RecipeBuilder.tryToCreateWidenRecipe(Instr, Operands, Range, VPBB);
if (!Recipe)
- Recipe = RecipeBuilder.handleReplication(Instr, Range, *Plan);
+ Recipe = RecipeBuilder.handleReplication(Instr, Range);
for (auto *Def : Recipe->definedValues()) {
auto *UV = Def->getUnderlyingValue();
Plan->addVPValue(UV, Def);
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index b149802..29a395c 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -23,6 +23,9 @@ class TargetLibraryInfo;
/// Helper class to create VPRecipies from IR instructions.
class VPRecipeBuilder {
+ /// The VPlan new recipes are added to.
+ VPlan &Plan;
+
/// The loop that we evaluate.
Loop *OrigLoop;
@@ -69,53 +72,50 @@ class VPRecipeBuilder {
/// recipe that takes an additional VPInstruction for the mask.
VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
ArrayRef<VPValue *> Operands,
- VFRange &Range,
- VPlanPtr &Plan);
+ VFRange &Range);
/// Check if an induction recipe should be constructed for \p Phi. If so build
/// and return it. If not, return null.
VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi,
ArrayRef<VPValue *> Operands,
- VPlan &Plan, VFRange &Range);
+ VFRange &Range);
/// Optimize the special case where the operand of \p I is a constant integer
/// induction variable.
VPWidenIntOrFpInductionRecipe *
tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
- VFRange &Range, VPlan &Plan);
+ VFRange &Range);
/// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently
/// all such phi nodes are turned into a sequence of select instructions as
/// the vectorizer currently performs full if-conversion.
- VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands,
- VPlanPtr &Plan);
+ VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands);
/// Handle call instructions. If \p CI can be widened for \p Range.Start,
/// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same
/// decision from \p Range.Start to \p Range.End.
VPWidenCallRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
- VFRange &Range, VPlanPtr &Plan);
+ VFRange &Range);
/// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
/// if it can. The function should only be called if the cost-model indicates
/// that widening should be performed.
VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands,
- VPBasicBlock *VPBB, VPlanPtr &Plan);
+ VPBasicBlock *VPBB);
public:
- VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
+ VPRecipeBuilder(VPlan &Plan, Loop *OrigLoop, const TargetLibraryInfo *TLI,
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM,
PredicatedScalarEvolution &PSE, VPBuilder &Builder)
- : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), PSE(PSE),
- Builder(Builder) {}
+ : Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM),
+ PSE(PSE), Builder(Builder) {}
/// Create and return a widened recipe for \p I if one can be created within
/// the given VF \p Range.
VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr,
ArrayRef<VPValue *> Operands,
- VFRange &Range, VPBasicBlock *VPBB,
- VPlanPtr &Plan);
+ VFRange &Range, VPBasicBlock *VPBB);
/// Set the recipe created for given ingredient. This operation is a no-op for
/// ingredients that were not marked using a nullptr entry in the map.
@@ -128,19 +128,19 @@ public:
}
/// Create the mask for the vector loop header block.
- void createHeaderMask(VPlan &Plan);
+ void createHeaderMask();
/// A helper function that computes the predicate of the block BB, assuming
/// that the header block of the loop is set to True or the loop mask when
/// tail folding.
- void createBlockInMask(BasicBlock *BB, VPlan &Plan);
+ void createBlockInMask(BasicBlock *BB);
/// Returns the *entry* mask for the block \p BB.
VPValue *getBlockInMask(BasicBlock *BB) const;
/// A helper function that computes the predicate of the edge between SRC
/// and DST.
- VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlan &Plan);
+ VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
/// A helper that returns the previously computed predicate of the edge
/// between SRC and DST.
@@ -166,8 +166,7 @@ public:
/// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as
/// last operand. Range.End may be decreased to ensure same recipe behavior
/// from \p Range.Start to \p Range.End.
- VPReplicateRecipe *handleReplication(Instruction *I, VFRange &Range,
- VPlan &Plan);
+ VPReplicateRecipe *handleReplication(Instruction *I, VFRange &Range);
/// Add the incoming values from the backedge to reduction & first-order
/// recurrence cross-iteration phis.