aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Hahn <flo@fhahn.com>2024-01-29 09:52:05 +0000
committerGitHub <noreply@github.com>2024-01-29 09:52:05 +0000
commit743946e8ef0d164cbaa3409d11b218e299ccd35e (patch)
tree56bb87b5499fc28254cc57cd06ad66f1df33bb69
parentce8032394fa4ff55c36d857e85241c1bd0cacc60 (diff)
downloadllvm-743946e8ef0d164cbaa3409d11b218e299ccd35e.zip
llvm-743946e8ef0d164cbaa3409d11b218e299ccd35e.tar.gz
llvm-743946e8ef0d164cbaa3409d11b218e299ccd35e.tar.bz2
[VPlan] Replace VPRecipeOrVPValue with VP2VP recipe simplification. (#76090)
Move simplification of VPBlendRecipes from early VPlan construction to VPlan-to-VPlan based recipe simplification. This simplifies initial construction. Note that some in-loop reduction tests are failing at the moment, due to the reduction predicate being created after the reduction recipe. I will provide a patch for that soon. PR: https://github.com/llvm/llvm-project/pull/76090
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp133
-rw-r--r--llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h50
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp13
3 files changed, 92 insertions, 104 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3aff0a0..3483e2c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8112,10 +8112,9 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
BlockMaskCache[BB] = BlockMask;
}
-VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
- ArrayRef<VPValue *> Operands,
- VFRange &Range,
- VPlanPtr &Plan) {
+VPWidenMemoryInstructionRecipe *
+VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
+ VFRange &Range, VPlanPtr &Plan) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
"Must be called with either a load or store");
@@ -8187,7 +8186,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc);
}
-VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
+VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
PHINode *Phi, ArrayRef<VPValue *> Operands, VPlan &Plan, VFRange &Range) {
// Check if this is an integer or fp induction. If so, build the recipe that
@@ -8239,31 +8238,10 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
return nullptr;
}
-VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
- ArrayRef<VPValue *> Operands,
- VPlanPtr &Plan) {
- // If all incoming values are equal, the incoming VPValue can be used directly
- // instead of creating a new VPBlendRecipe.
- if (llvm::all_equal(Operands))
- return Operands[0];
-
+VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
+ ArrayRef<VPValue *> Operands,
+ VPlanPtr &Plan) {
unsigned NumIncoming = Phi->getNumIncomingValues();
- // For in-loop reductions, we do not need to create an additional select.
- VPValue *InLoopVal = nullptr;
- for (unsigned In = 0; In < NumIncoming; In++) {
- PHINode *PhiOp =
- dyn_cast_or_null<PHINode>(Operands[In]->getUnderlyingValue());
- if (PhiOp && CM.isInLoopReduction(PhiOp)) {
- assert(!InLoopVal && "Found more than one in-loop reduction!");
- InLoopVal = Operands[In];
- }
- }
-
- assert((!InLoopVal || NumIncoming == 2) &&
- "Found an in-loop reduction for PHI with unexpected number of "
- "incoming values");
- if (InLoopVal)
- return Operands[Operands[0] == InLoopVal ? 1 : 0];
// We know that all PHIs in non-header blocks are converted into selects, so
// we don't have to worry about the insertion order and we can just use the
@@ -8273,15 +8251,18 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
SmallVector<VPValue *, 2> OperandsWithMask;
for (unsigned In = 0; In < NumIncoming; In++) {
+ OperandsWithMask.push_back(Operands[In]);
VPValue *EdgeMask =
createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), *Plan);
- assert((EdgeMask || NumIncoming == 1) &&
- "Multiple predecessors with one having a full mask");
- OperandsWithMask.push_back(Operands[In]);
- if (EdgeMask)
- OperandsWithMask.push_back(EdgeMask);
+ if (!EdgeMask) {
+ assert(In == 0 && "Both null and non-null edge masks found");
+ assert(all_equal(Operands) &&
+ "Distinct incoming values with one having a full mask");
+ break;
+ }
+ OperandsWithMask.push_back(EdgeMask);
}
- return toVPRecipeResult(new VPBlendRecipe(Phi, OperandsWithMask));
+ return new VPBlendRecipe(Phi, OperandsWithMask);
}
VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
@@ -8390,9 +8371,9 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
Range);
}
-VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I,
- ArrayRef<VPValue *> Operands,
- VPBasicBlock *VPBB, VPlanPtr &Plan) {
+VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
+ ArrayRef<VPValue *> Operands,
+ VPBasicBlock *VPBB, VPlanPtr &Plan) {
switch (I->getOpcode()) {
default:
return nullptr;
@@ -8449,9 +8430,9 @@ void VPRecipeBuilder::fixHeaderPhis() {
}
}
-VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I,
- VFRange &Range,
- VPlan &Plan) {
+VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I,
+ VFRange &Range,
+ VPlan &Plan) {
bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
Range);
@@ -8503,14 +8484,12 @@ VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I,
auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()),
IsUniform, BlockInMask);
- return toVPRecipeResult(Recipe);
+ return Recipe;
}
-VPRecipeOrVPValueTy
-VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
- ArrayRef<VPValue *> Operands,
- VFRange &Range, VPBasicBlock *VPBB,
- VPlanPtr &Plan) {
+VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
+ Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range,
+ VPBasicBlock *VPBB, VPlanPtr &Plan) {
// First, check for specific widening recipes that deal with inductions, Phi
// nodes, calls and memory operations.
VPRecipeBase *Recipe;
@@ -8523,7 +8502,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
recordRecipeOf(Phi);
if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range)))
- return toVPRecipeResult(Recipe);
+ return Recipe;
VPHeaderPHIRecipe *PhiRecipe = nullptr;
assert((Legal->isReductionVariable(Phi) ||
@@ -8555,13 +8534,13 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
recordRecipeOf(Inc);
PhisToFix.push_back(PhiRecipe);
- return toVPRecipeResult(PhiRecipe);
+ return PhiRecipe;
}
if (isa<TruncInst>(Instr) &&
(Recipe = tryToOptimizeInductionTruncate(cast<TruncInst>(Instr), Operands,
Range, *Plan)))
- return toVPRecipeResult(Recipe);
+ return Recipe;
// All widen recipes below deal only with VF > 1.
if (LoopVectorizationPlanner::getDecisionAndClampRange(
@@ -8569,29 +8548,29 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
return nullptr;
if (auto *CI = dyn_cast<CallInst>(Instr))
- return toVPRecipeResult(tryToWidenCall(CI, Operands, Range, Plan));
+ return tryToWidenCall(CI, Operands, Range, Plan);
if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
- return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan));
+ return tryToWidenMemory(Instr, Operands, Range, Plan);
if (!shouldWiden(Instr, Range))
return nullptr;
if (auto GEP = dyn_cast<GetElementPtrInst>(Instr))
- return toVPRecipeResult(new VPWidenGEPRecipe(
- GEP, make_range(Operands.begin(), Operands.end())));
+ return new VPWidenGEPRecipe(GEP,
+ make_range(Operands.begin(), Operands.end()));
if (auto *SI = dyn_cast<SelectInst>(Instr)) {
- return toVPRecipeResult(new VPWidenSelectRecipe(
- *SI, make_range(Operands.begin(), Operands.end())));
+ return new VPWidenSelectRecipe(
+ *SI, make_range(Operands.begin(), Operands.end()));
}
if (auto *CI = dyn_cast<CastInst>(Instr)) {
- return toVPRecipeResult(new VPWidenCastRecipe(CI->getOpcode(), Operands[0],
- CI->getType(), *CI));
+ return new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(),
+ *CI);
}
- return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
+ return tryToWiden(Instr, Operands, VPBB, Plan);
}
void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
@@ -8779,22 +8758,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
continue;
- auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe(
+ VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe(
Instr, Operands, Range, VPBB, Plan);
- if (!RecipeOrValue)
- RecipeOrValue = RecipeBuilder.handleReplication(Instr, Range, *Plan);
- // If Instr can be simplified to an existing VPValue, use it.
- if (isa<VPValue *>(RecipeOrValue)) {
- auto *VPV = cast<VPValue *>(RecipeOrValue);
- Plan->addVPValue(Instr, VPV);
- // If the re-used value is a recipe, register the recipe for the
- // instruction, in case the recipe for Instr needs to be recorded.
- if (VPRecipeBase *R = VPV->getDefiningRecipe())
- RecipeBuilder.setRecipe(Instr, R);
- continue;
- }
- // Otherwise, add the new recipe.
- VPRecipeBase *Recipe = cast<VPRecipeBase *>(RecipeOrValue);
+ if (!Recipe)
+ Recipe = RecipeBuilder.handleReplication(Instr, Range, *Plan);
for (auto *Def : Recipe->definedValues()) {
auto *UV = Def->getUnderlyingValue();
Plan->addVPValue(UV, Def);
@@ -9041,7 +9008,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// the phi until LoopExitValue. We keep track of the previous item
// (PreviousLink) to tell which of the two operands of a Link will remain
// scalar and which will be reduced. For minmax by select(cmp), Link will be
- // the select instructions.
+ // the select instructions. Blend recipes of in-loop reduction phi's will
+ // get folded to their non-phi operand, as the reduction recipe handles the
+ // condition directly.
VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
for (VPSingleDefRecipe *CurrentLink : Worklist.getArrayRef().drop_front()) {
Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
@@ -9072,6 +9041,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
VecOp = FMulRecipe;
} else {
+ auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink);
+ if (PhiR->isInLoop() && Blend) {
+ assert(Blend->getNumIncomingValues() == 2 &&
+ "Blend must have 2 incoming values");
+ if (Blend->getIncomingValue(0) == PhiR)
+ Blend->replaceAllUsesWith(Blend->getIncomingValue(1));
+ else {
+ assert(Blend->getIncomingValue(1) == PhiR &&
+ "PhiR must be an operand of the blend");
+ Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
+ }
+ continue;
+ }
+
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
if (isa<VPWidenRecipe>(CurrentLink)) {
assert(isa<CmpInst>(CurrentLinkI) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 5645cfa1..b149802 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -21,8 +21,6 @@ class LoopVectorizationLegality;
class LoopVectorizationCostModel;
class TargetLibraryInfo;
-using VPRecipeOrVPValueTy = PointerUnion<VPRecipeBase *, VPValue *>;
-
/// Helper class to create VPRecipies from IR instructions.
class VPRecipeBuilder {
/// The loop that we evaluate.
@@ -69,14 +67,16 @@ class VPRecipeBuilder {
/// Check if the load or store instruction \p I should widened for \p
/// Range.Start and potentially masked. Such instructions are handled by a
/// recipe that takes an additional VPInstruction for the mask.
- VPRecipeBase *tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
- VFRange &Range, VPlanPtr &Plan);
+ VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
+ ArrayRef<VPValue *> Operands,
+ VFRange &Range,
+ VPlanPtr &Plan);
/// Check if an induction recipe should be constructed for \p Phi. If so build
/// and return it. If not, return null.
- VPRecipeBase *tryToOptimizeInductionPHI(PHINode *Phi,
- ArrayRef<VPValue *> Operands,
- VPlan &Plan, VFRange &Range);
+ VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi,
+ ArrayRef<VPValue *> Operands,
+ VPlan &Plan, VFRange &Range);
/// Optimize the special case where the operand of \p I is a constant integer
/// induction variable.
@@ -84,12 +84,11 @@ class VPRecipeBuilder {
tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
VFRange &Range, VPlan &Plan);
- /// Handle non-loop phi nodes. Return a VPValue, if all incoming values match
- /// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned
- /// into a sequence of select instructions as the vectorizer currently
- /// performs full if-conversion.
- VPRecipeOrVPValueTy tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands,
- VPlanPtr &Plan);
+ /// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently
+ /// all such phi nodes are turned into a sequence of select instructions as
+ /// the vectorizer currently performs full if-conversion.
+ VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands,
+ VPlanPtr &Plan);
/// Handle call instructions. If \p CI can be widened for \p Range.Start,
/// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same
@@ -100,11 +99,8 @@ class VPRecipeBuilder {
/// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
/// if it can. The function should only be called if the cost-model indicates
/// that widening should be performed.
- VPRecipeBase *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands,
- VPBasicBlock *VPBB, VPlanPtr &Plan);
-
- /// Return a VPRecipeOrValueTy with VPRecipeBase * being set. This can be used to force the use as VPRecipeBase* for recipe sub-types that also inherit from VPValue.
- VPRecipeOrVPValueTy toVPRecipeResult(VPRecipeBase *R) const { return R; }
+ VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands,
+ VPBasicBlock *VPBB, VPlanPtr &Plan);
public:
VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
@@ -114,14 +110,12 @@ public:
: OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), PSE(PSE),
Builder(Builder) {}
- /// Check if an existing VPValue can be used for \p Instr or a recipe can be
- /// create for \p I withing the given VF \p Range. If an existing VPValue can
- /// be used or if a recipe can be created, return it. Otherwise return a
- /// VPRecipeOrVPValueTy with nullptr.
- VPRecipeOrVPValueTy tryToCreateWidenRecipe(Instruction *Instr,
- ArrayRef<VPValue *> Operands,
- VFRange &Range, VPBasicBlock *VPBB,
- VPlanPtr &Plan);
+ /// Create and return a widened recipe for \p I if one can be created within
+ /// the given VF \p Range.
+ VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr,
+ ArrayRef<VPValue *> Operands,
+ VFRange &Range, VPBasicBlock *VPBB,
+ VPlanPtr &Plan);
/// Set the recipe created for given ingredient. This operation is a no-op for
/// ingredients that were not marked using a nullptr entry in the map.
@@ -172,8 +166,8 @@ public:
/// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as
/// last operand. Range.End may be decreased to ensure same recipe behavior
/// from \p Range.Start to \p Range.End.
- VPRecipeOrVPValueTy handleReplication(Instruction *I, VFRange &Range,
- VPlan &Plan);
+ VPReplicateRecipe *handleReplication(Instruction *I, VFRange &Range,
+ VPlan &Plan);
/// Add the incoming values from the backedge to reduction & first-order
/// recurrence cross-iteration phis.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b4d913d..5d7ac2b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -827,6 +827,17 @@ static unsigned getOpcodeForRecipe(VPRecipeBase &R) {
/// Try to simplify recipe \p R.
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
+ // Try to remove redundant blend recipes.
+ if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
+ VPValue *Inc0 = Blend->getIncomingValue(0);
+ for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
+ if (Inc0 != Blend->getIncomingValue(I))
+ return;
+ Blend->replaceAllUsesWith(Inc0);
+ Blend->eraseFromParent();
+ return;
+ }
+
switch (getOpcodeForRecipe(R)) {
case Instruction::Mul: {
VPValue *A = R.getOperand(0);
@@ -1031,8 +1042,8 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
removeRedundantCanonicalIVs(Plan);
removeRedundantInductionCasts(Plan);
- optimizeInductions(Plan, SE);
simplifyRecipes(Plan, SE.getContext());
+ optimizeInductions(Plan, SE);
removeDeadRecipes(Plan);
createAndOptimizeReplicateRegions(Plan);