aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Hahn <flo@fhahn.com>2024-06-21 19:54:21 +0100
committerFlorian Hahn <flo@fhahn.com>2024-06-21 19:54:21 +0100
commitf1f3c34b4770437bdb022737918603b4bbeb523e (patch)
treebf1ef2af10fca10b33d822b4fb74f25dc7c997ac
parent39048b69b85e530b9b8a4226d9043a0bd340fe8a (diff)
downloadllvm-f1f3c34b4770437bdb022737918603b4bbeb523e.zip
llvm-f1f3c34b4770437bdb022737918603b4bbeb523e.tar.gz
llvm-f1f3c34b4770437bdb022737918603b4bbeb523e.tar.bz2
Revert "Recommit "[VPlan] First step towards VPlan cost modeling. (#92555)""
This reverts commit 242cc200ccb24e22eaf54aed7b0b0c84cfc54c0b and eea150c84053035163f307b46549a2997a343ce9, as it is causing a build bot failure and there have been a number of crashes reported at https://github.com/llvm/llvm-project/pull/92555
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h17
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp229
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp89
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h67
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp35
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp5
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h3
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll2
-rw-r--r--llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll77
-rw-r--r--llvm/test/Transforms/LoopVectorize/WebAssembly/lit.local.cfg2
10 files changed, 28 insertions, 498 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 6011e16..c03c278 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -344,16 +344,6 @@ class LoopVectorizationPlanner {
/// A builder used to construct the current plan.
VPBuilder Builder;
- /// Computes the cost of \p Plan for vectorization factor \p VF.
- ///
- /// The current implementation requires access to the
- /// LoopVectorizationLegality to handle inductions and reductions, which is
- /// why it is kept separate from the VPlan-only cost infrastructure.
- ///
- /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
- /// been retired.
- InstructionCost cost(VPlan &Plan, ElementCount VF) const;
-
public:
LoopVectorizationPlanner(
Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
@@ -375,9 +365,6 @@ public:
/// Return the best VPlan for \p VF.
VPlan &getBestPlanFor(ElementCount VF) const;
- /// Return the most profitable plan and fix its VF to the most profitable one.
- VPlan &getBestPlan() const;
-
/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
/// according to the best selected \p VF and \p UF.
///
@@ -456,9 +443,7 @@ private:
ElementCount MinVF);
/// \return The most profitable vectorization factor and the cost of that VF.
- /// This method checks every VF in \p CandidateVFs. This is now only used to
- /// verify the decisions by the new VPlan-based cost-model and will be retired
- /// once the VPlan-based cost-model is stabilized.
+ /// This method checks every VF in \p CandidateVFs.
VectorizationFactor
selectVectorizationFactor(const ElementCountSet &CandidateVFs);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 63e7c55..9571cfe 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -290,7 +290,7 @@ static cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor(
cl::desc("A flag that overrides the target's max interleave factor for "
"vectorized loops."));
-cl::opt<unsigned> ForceTargetInstructionCost(
+static cl::opt<unsigned> ForceTargetInstructionCost(
"force-target-instruction-cost", cl::init(0), cl::Hidden,
cl::desc("A flag that overrides the target's expected cost for "
"an instruction to a single constant value. Mostly "
@@ -412,6 +412,14 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
}
+/// A helper function that returns the reciprocal of the block probability of
+/// predicated blocks. If we return X, we are assuming the predicated block
+/// will execute once for every X iterations of the loop header.
+///
+/// TODO: We should use actual block probability here, if available. Currently,
+/// we always assume predicated blocks have a 50% chance of executing.
+static unsigned getReciprocalPredBlockProb() { return 2; }
+
/// Returns "best known" trip count for the specified loop \p L as defined by
/// the following procedure:
/// 1) Returns exact trip count if it is known.
@@ -1613,16 +1621,6 @@ public:
/// \p VF is the vectorization factor chosen for the original loop.
bool isEpilogueVectorizationProfitable(const ElementCount VF) const;
- /// Return the cost of instructions in an inloop reduction pattern, if I is
- /// part of that pattern.
- std::optional<InstructionCost>
- getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
- TTI::TargetCostKind CostKind) const;
-
- /// Returns the execution time cost of an instruction for a given vector
- /// width. Vector width of one means scalar.
- VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
-
private:
unsigned NumPredStores = 0;
@@ -1648,11 +1646,21 @@ private:
/// of elements.
ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
+ /// Returns the execution time cost of an instruction for a given vector
+ /// width. Vector width of one means scalar.
+ VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
+
/// The cost-computation logic from getInstructionCost which provides
/// the vector type as an output parameter.
InstructionCost getInstructionCost(Instruction *I, ElementCount VF,
Type *&VectorTy);
+ /// Return the cost of instructions in an inloop reduction pattern, if I is
+ /// part of that pattern.
+ std::optional<InstructionCost>
+ getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
+ TTI::TargetCostKind CostKind) const;
+
/// Calculate vectorization cost of memory instruction \p I.
InstructionCost getMemoryInstructionCost(Instruction *I, ElementCount VF);
@@ -7289,10 +7297,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
if (!MaxFactors.hasVector())
return VectorizationFactor::Disabled();
- // Select the optimal vectorization factor according to the legacy cost-model.
- // This is now only used to verify the decisions by the new VPlan-based
- // cost-model and will be retired once the VPlan-based cost-model is
- // stabilized.
+ // Select the optimal vectorization factor.
VectorizationFactor VF = selectVectorizationFactor(VFCandidates);
assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
if (!hasPlanWithVF(VF.Width)) {
@@ -7303,189 +7308,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return VF;
}
-InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
- ElementCount VF) const {
- return CM.getInstructionCost(UI, VF).first;
-}
-
-bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const {
- return (IsVector && CM.VecValuesToIgnore.contains(UI)) ||
- SkipCostComputation.contains(UI);
-}
-
-InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
- ElementCount VF) const {
- InstructionCost Cost = 0;
- LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext();
- VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM);
-
- // Cost modeling for inductions is inaccurate in the legacy cost model
- // compared to the recipes that are generated. To match here initially during
- // VPlan cost model bring up directly use the induction costs from the legacy
- // cost model. Note that we do this as pre-processing; the VPlan may not have
- // any recipes associated with the original induction increment instruction
- // and may replace truncates with VPWidenIntOrFpInductionRecipe. We precompute
- // the cost of induction phis and increments (both that are represented by
- // recipes and those that are not), to avoid distinguishing between them here,
- // and skip all recipes that represent induction phis and increments (the
- // former case) later on, if they exist, to avoid counting them twice.
- // Similarly we pre-compute the cost of any optimized truncates.
- // TODO: Switch to more accurate costing based on VPlan.
- for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
- Instruction *IVInc = cast<Instruction>(
- IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
- SmallVector<Instruction *> IVInsts = {IV, IVInc};
- for (User *U : IV->users()) {
- auto *CI = cast<Instruction>(U);
- if (!CostCtx.CM.isOptimizableIVTruncate(CI, VF))
- continue;
- IVInsts.push_back(CI);
- }
- for (Instruction *IVInst : IVInsts) {
- if (!CostCtx.SkipCostComputation.insert(IVInst).second)
- continue;
- InstructionCost InductionCost = CostCtx.getLegacyCost(IVInst, VF);
- LLVM_DEBUG({
- dbgs() << "Cost of " << InductionCost << " for VF " << VF
- << ": induction instruction " << *IVInst << "\n";
- });
- Cost += InductionCost;
- }
- }
-
- /// Compute the cost of all exiting conditions of the loop using the legacy
- /// cost model. This is to match the legacy behavior, which adds the cost of
- /// all exit conditions. Note that this over-estimates the cost, as there will
- /// be a single condition to control the vector loop.
- SmallVector<BasicBlock *> Exiting;
- CM.TheLoop->getExitingBlocks(Exiting);
- SetVector<Instruction *> ExitInstrs;
- // Collect all exit conditions.
- for (BasicBlock *EB : Exiting) {
- auto *Term = dyn_cast<BranchInst>(EB->getTerminator());
- if (!Term)
- continue;
- if (auto *CondI = dyn_cast<Instruction>(Term->getOperand(0))) {
- ExitInstrs.insert(CondI);
- }
- }
- // Compute the cost of all instructions only feeding the exit conditions.
- for (unsigned I = 0; I != ExitInstrs.size(); ++I) {
- Instruction *CondI = ExitInstrs[I];
- if (!OrigLoop->contains(CondI) ||
- !CostCtx.SkipCostComputation.insert(CondI).second)
- continue;
- Cost += CostCtx.getLegacyCost(CondI, VF);
- for (Value *Op : CondI->operands()) {
- auto *OpI = dyn_cast<Instruction>(Op);
- if (!OpI || any_of(OpI->users(), [&ExitInstrs](User *U) {
- return !ExitInstrs.contains(cast<Instruction>(U));
- }))
- continue;
- ExitInstrs.insert(OpI);
- }
- }
-
- // The legacy cost model has special logic to compute the cost of in-loop
- // reductions, which may be smaller than the sum of all instructions involved
- // in the reduction. For AnyOf reductions, VPlan codegen may remove the select
- // which the legacy cost model uses to assign cost. Pre-compute their costs
- // for now.
- // TODO: Switch to costing based on VPlan once the logic has been ported.
- for (const auto &[RedPhi, RdxDesc] : Legal->getReductionVars()) {
- if (!CM.isInLoopReduction(RedPhi) &&
- !RecurrenceDescriptor::isAnyOfRecurrenceKind(
- RdxDesc.getRecurrenceKind()))
- continue;
-
- // AnyOf reduction codegen may remove the select. To match the legacy cost
- // model, pre-compute the cost for AnyOf reductions here.
- if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
- RdxDesc.getRecurrenceKind())) {
- auto *Select = cast<SelectInst>(*find_if(
- RedPhi->users(), [](User *U) { return isa<SelectInst>(U); }));
- assert(!CostCtx.SkipCostComputation.contains(Select) &&
- "reduction op visited multiple times");
- CostCtx.SkipCostComputation.insert(Select);
- auto ReductionCost = CostCtx.getLegacyCost(Select, VF);
- LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF
- << ":\n any-of reduction " << *Select << "\n");
- Cost += ReductionCost;
- continue;
- }
-
- const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop);
- SetVector<Instruction *> ChainOpsAndOperands(ChainOps.begin(),
- ChainOps.end());
- // Also include the operands of instructions in the chain, as the cost-model
- // may mark extends as free.
- for (auto *ChainOp : ChainOps) {
- for (Value *Op : ChainOp->operands()) {
- if (auto *I = dyn_cast<Instruction>(Op))
- ChainOpsAndOperands.insert(I);
- }
- }
-
- // Pre-compute the cost for I, if it has a reduction pattern cost.
- for (Instruction *I : ChainOpsAndOperands) {
- auto ReductionCost = CM.getReductionPatternCost(
- I, VF, ToVectorTy(I->getType(), VF), TTI::TCK_RecipThroughput);
- if (!ReductionCost)
- continue;
-
- assert(!CostCtx.SkipCostComputation.contains(I) &&
- "reduction op visited multiple times");
- CostCtx.SkipCostComputation.insert(I);
- LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF
- << ":\n in-loop reduction " << *I << "\n");
- Cost += *ReductionCost;
- }
- }
-
- // Now compute and add the VPlan-based cost.
- Cost += Plan.cost(VF, CostCtx);
- LLVM_DEBUG(dbgs() << "Cost for VF " << VF << ": " << Cost << "\n");
- return Cost;
-}
-
-VPlan &LoopVectorizationPlanner::getBestPlan() const {
- // If there is a single VPlan with a single VF, return it directly.
- VPlan &FirstPlan = *VPlans[0];
- if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
- return FirstPlan;
-
- VPlan *BestPlan = &FirstPlan;
- ElementCount ScalarVF = ElementCount::getFixed(1);
- assert(hasPlanWithVF(ScalarVF) &&
- "More than a single plan/VF w/o any plan having scalar VF");
-
- InstructionCost ScalarCost = cost(getBestPlanFor(ScalarVF), ScalarVF);
- VectorizationFactor BestFactor(ScalarVF, ScalarCost, ScalarCost);
-
- bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled;
- if (ForceVectorization) {
- // Ignore scalar width, because the user explicitly wants vectorization.
- // Initialize cost to max so that VF = 2 is, at least, chosen during cost
- // evaluation.
- BestFactor.Cost = InstructionCost::getMax();
- }
-
- for (auto &P : VPlans) {
- for (ElementCount VF : P->vectorFactors()) {
- if (VF.isScalar())
- continue;
- InstructionCost Cost = cost(*P, VF);
- VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
- if (isMoreProfitable(CurrentFactor, BestFactor)) {
- BestFactor = CurrentFactor;
- BestPlan = &*P;
- }
- }
- }
- BestPlan->setVF(BestFactor.Width);
- return *BestPlan;
-}
-
VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
assert(count_if(VPlans,
[VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
@@ -10344,15 +10166,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
PSI, Checks);
- VPlan &BestPlan = LVP.getBestPlan();
- assert(size(BestPlan.vectorFactors()) == 1 &&
- "Plan should have a single VF");
- ElementCount Width = *BestPlan.vectorFactors().begin();
- LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width
- << "\n");
- assert(VF.Width == Width &&
- "VPlan cost model and legacy cost model disagreed");
- LVP.executePlan(Width, IC, BestPlan, LB, DT, false);
+ VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
+ LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;
// Add metadata to disable runtime unrolling a scalar loop when there
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index fe6de48..5070950 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -752,72 +752,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
State->Instance.reset();
}
-InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) {
- InstructionCost Cost = 0;
- for (VPRecipeBase &R : Recipes)
- Cost += R.cost(VF, Ctx);
- return Cost;
-}
-
-InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) {
- if (!isReplicator()) {
- InstructionCost Cost = 0;
- for (VPBlockBase *Block : vp_depth_first_shallow(getEntry()))
- Cost += Block->cost(VF, Ctx);
- InstructionCost BackedgeCost =
- Ctx.TTI.getCFInstrCost(Instruction::Br, TTI::TCK_RecipThroughput);
- LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
- << ": vector loop backedge\n");
- Cost += BackedgeCost;
- return Cost;
- }
-
- // Compute the cost of a replicate region. Replicating isn't supported for
- // scalable vectors, return an invalid cost for them.
- // TODO: Discard scalable VPlans with replicate recipes earlier after
- // construction.
- if (VF.isScalable())
- return InstructionCost::getInvalid();
-
- // First compute the cost of the conditionally executed recipes, followed by
- // account for the branching cost, except if the mask is a header mask or
- // uniform condition.
- using namespace llvm::VPlanPatternMatch;
- VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]);
- InstructionCost ThenCost = Then->cost(VF, Ctx);
-
- // Note the cost estimates below closely match the current legacy cost model.
- auto *BOM = cast<VPBranchOnMaskRecipe>(&getEntryBasicBlock()->front());
- VPValue *Cond = BOM->getOperand(0);
-
- // Check if Cond is a header mask and don't account for branching costs as the
- // header mask will always be true except in the last iteration.
- if (vputils::isHeaderMask(Cond, *getPlan()))
- return ThenCost;
-
- // For the scalar case, we may not always execute the original predicated
- // block, Thus, scale the block's cost by the probability of executing it.
- if (VF.isScalar())
- return ThenCost / getReciprocalPredBlockProb();
-
- // Check if Cond is a uniform compare and don't account for branching costs as
- // a uniform condition corresponds to a single branch per VF.
- if (vputils::isUniformBoolean(Cond))
- return ThenCost;
-
- // Add the cost for branches around scalarized and predicated blocks.
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
-
- auto *Vec_i1Ty = VectorType::get(IntegerType::getInt1Ty(Ctx.LLVMCtx), VF);
- auto FixedVF = VF.getFixedValue(); // Known to be non scalable.
- InstructionCost Cost = ThenCost;
- Cost += Ctx.TTI.getScalarizationOverhead(Vec_i1Ty, APInt::getAllOnes(FixedVF),
- /*Insert*/ false, /*Extract*/ true,
- CostKind);
- Cost += Ctx.TTI.getCFInstrCost(Instruction::Br, CostKind) * FixedVF;
- return Cost;
-}
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -1007,12 +941,6 @@ void VPlan::execute(VPTransformState *State) {
"DT not preserved correctly");
}
-InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
- // For now only return the cost of the vector loop region, ignoring any other
- // blocks, like the preheader or middle blocks.
- return getVectorLoopRegion()->cost(VF, Ctx);
-}
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPlan::printLiveIns(raw_ostream &O) const {
VPSlotTracker SlotTracker(this);
@@ -1555,8 +1483,7 @@ bool vputils::isHeaderMask(VPValue *V, VPlan &Plan) {
auto IsWideCanonicalIV = [](VPValue *A) {
return isa<VPWidenCanonicalIVRecipe>(A) ||
(isa<VPWidenIntOrFpInductionRecipe>(A) &&
- cast<VPWidenIntOrFpInductionRecipe>(A)->isCanonical()) ||
- match(A, m_ScalarIVSteps(m_CanonicalIV(), m_SpecificInt(1)));
+ cast<VPWidenIntOrFpInductionRecipe>(A)->isCanonical());
};
VPValue *A, *B;
@@ -1568,17 +1495,3 @@ bool vputils::isHeaderMask(VPValue *V, VPlan &Plan) {
return match(V, m_Binary<Instruction::ICmp>(m_VPValue(A), m_VPValue(B))) &&
IsWideCanonicalIV(A) && B == Plan.getOrCreateBackedgeTakenCount();
}
-
-bool vputils::isUniformBoolean(VPValue *Cond) {
- if (match(Cond, m_Not(m_VPValue())))
- Cond = Cond->getDefiningRecipe()->getOperand(0);
- auto *R = Cond->getDefiningRecipe();
- if (!R)
- return true;
- // TODO: match additional patterns preserving uniformity of booleans, e.g.,
- // AND/OR/etc.
- return match(R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue())) &&
- all_of(R->operands(), [](VPValue *Op) {
- return vputils::isUniformAfterVectorization(Op);
- });
-}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6a51023..fc25ed9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -42,7 +42,6 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/FMF.h"
#include "llvm/IR/Operator.h"
-#include "llvm/Support/InstructionCost.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -65,11 +64,8 @@ class VPlan;
class VPReplicateRecipe;
class VPlanSlp;
class Value;
-class LoopVectorizationCostModel;
class LoopVersioning;
-struct VPCostContext;
-
namespace Intrinsic {
typedef unsigned ID;
}
@@ -86,14 +82,6 @@ Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
Loop *CurLoop = nullptr);
-/// A helper function that returns the reciprocal of the block probability of
-/// predicated blocks. If we return X, we are assuming the predicated block
-/// will execute once for every X iterations of the loop header.
-///
-/// TODO: We should use actual block probability here, if available. Currently,
-/// we always assume predicated blocks have a 50% chance of executing.
-inline unsigned getReciprocalPredBlockProb() { return 2; }
-
/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
/// [1, 16) = {1, 2, 4, 8}
@@ -636,9 +624,6 @@ public:
/// VPBlockBase, thereby "executing" the VPlan.
virtual void execute(VPTransformState *State) = 0;
- /// Return the cost of the block.
- virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
-
/// Delete all blocks reachable from a given VPBlockBase, inclusive.
static void deleteCFG(VPBlockBase *Entry);
@@ -722,27 +707,6 @@ public:
#endif
};
-/// Struct to hold various analysis needed for cost computations.
-struct VPCostContext {
- const TargetTransformInfo &TTI;
- VPTypeAnalysis Types;
- LLVMContext &LLVMCtx;
- LoopVectorizationCostModel &CM;
- SmallPtrSet<Instruction *, 8> SkipCostComputation;
-
- VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy,
- LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM)
- : TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}
-
- /// Return the cost for \p UI with \p VF using the legacy cost model as
- /// fallback until computing the cost of all recipes migrates to VPlan.
- InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const;
-
- /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
- /// has already been pre-computed.
- bool skipCostComputation(Instruction *UI, bool IsVector) const;
-};
-
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
/// and is responsible for deleting its defined values. Single-value
@@ -782,11 +746,6 @@ public:
/// this VPRecipe, thereby "executing" the VPlan.
virtual void execute(VPTransformState &State) = 0;
- /// Return the cost of this recipe, taking into account if the cost
- /// computation should be skipped and the ForceTargetInstructionCost flag.
- /// Also takes care of printing the cost for debugging.
- virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
-
/// Insert an unlinked recipe into a basic block immediately before
/// the specified recipe.
void insertBefore(VPRecipeBase *InsertPos);
@@ -847,11 +806,6 @@ public:
/// Returns the debug location of the recipe.
DebugLoc getDebugLoc() const { return DL; }
-
-protected:
- /// Compute the cost of this recipe using the legacy cost model and the
- /// underlying instructions.
- InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
};
// Helper macro to define common classof implementations for recipes.
@@ -1427,6 +1381,8 @@ public:
ResultTy(ResultTy) {
assert(UI.getOpcode() == Opcode &&
"opcode of underlying cast doesn't match");
+ assert(UI.getType() == ResultTy &&
+ "result type of underlying cast doesn't match");
}
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
@@ -2140,8 +2096,6 @@ public:
"Op must be an operand of the recipe");
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
}
-
- Instruction *getInsertPos() const { return IG->getInsertPos(); }
};
/// A recipe to represent inloop reduction operations, performing a reduction on
@@ -2956,9 +2910,6 @@ public:
/// this VPBasicBlock, thereby "executing" the VPlan.
void execute(VPTransformState *State) override;
- /// Return the cost of this VPBasicBlock.
- InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
-
/// Return the position of the first non-phi node recipe in the block.
iterator getFirstNonPhi();
@@ -3133,9 +3084,6 @@ public:
/// this VPRegionBlock, thereby "executing" the VPlan.
void execute(VPTransformState *State) override;
- // Return the cost of this region.
- InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
-
void dropAllReferences(VPValue *NewValue) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3255,9 +3203,6 @@ public:
/// Generate the IR code for this VPlan.
void execute(VPTransformState *State);
- /// Return the cost of this plan.
- InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
-
VPBasicBlock *getEntry() { return Entry; }
const VPBasicBlock *getEntry() const { return Entry; }
@@ -3301,11 +3246,6 @@ public:
return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
}
- iterator_range<SmallSetVector<ElementCount, 2>::iterator>
- vectorFactors() const {
- return {VFs.begin(), VFs.end()};
- }
-
bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
@@ -3729,9 +3669,6 @@ inline bool isUniformAfterVectorization(VPValue *VPV) {
/// Return true if \p V is a header mask in \p Plan.
bool isHeaderMask(VPValue *V, VPlan &Plan);
-/// Return true if \p Cond is a uniform boolean.
-bool isUniformBoolean(VPValue *Cond);
-
} // end namespace vputils
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 972d895..a3ff639 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -39,7 +39,6 @@ using VectorParts = SmallVector<Value *, 2>;
namespace llvm {
extern cl::opt<bool> EnableVPlanNativePath;
}
-extern cl::opt<unsigned> ForceTargetInstructionCost;
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
@@ -256,40 +255,6 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB,
insertBefore(BB, I);
}
-InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
- if (auto *S = dyn_cast<VPSingleDefRecipe>(this)) {
- auto *UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
- if (UI && Ctx.skipCostComputation(UI, VF.isVector()))
- return 0;
- }
-
- InstructionCost RecipeCost = computeCost(VF, Ctx);
- if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
- RecipeCost.isValid())
- RecipeCost = InstructionCost(ForceTargetInstructionCost);
-
- LLVM_DEBUG({
- dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
- dump();
- });
- return RecipeCost;
-}
-
-InstructionCost VPRecipeBase::computeCost(ElementCount VF,
- VPCostContext &Ctx) const {
- // Compute the cost for the recipe falling back to the legacy cost model using
- // the underlying instruction. If there is no underlying instruction, returns
- // 0.
- Instruction *UI = nullptr;
- if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
- UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
- else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
- UI = IG->getInsertPos();
- else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
- UI = &WidenMem->getIngredient();
- return UI ? Ctx.getLegacyCost(UI, VF) : 0;
-}
-
FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
assert(OpType == OperationType::FPMathOp &&
"recipe doesn't have fast math flags");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d979ca3..e2b7b0c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -999,10 +999,6 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
: Instruction::ZExt;
auto *VPC =
new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
- if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
- // UnderlyingExt has distinct return type, used to retain legacy cost.
- VPC->setUnderlyingValue(UnderlyingExt);
- }
VPC->insertBefore(&R);
Trunc->replaceAllUsesWith(VPC);
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
@@ -1519,7 +1515,6 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
VPInstruction *New = Builder.createOverflowingOp(
Instruction::Add, {A, B}, {false, false},
RecWithFlags->getDebugLoc());
- New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
RecWithFlags->replaceAllUsesWith(New);
RecWithFlags->eraseFromParent();
CurRec = New;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index fa6a65f..8d945f6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -74,7 +74,8 @@ protected:
public:
/// Return the underlying Value attached to this VPValue.
- Value *getUnderlyingValue() const { return UnderlyingVal; }
+ Value *getUnderlyingValue() { return UnderlyingVal; }
+ const Value *getUnderlyingValue() const { return UnderlyingVal; }
/// An enumeration for keeping track of the concrete subclass of VPValue that
/// are actually instantiated.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 41879f3..b5aa96e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -119,7 +119,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV: Interleaving is not beneficial.
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
-; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
; CHECK: LV: Interleaving disabled by the pass manager
; CHECK-NEXT: LV: Vectorizing: innermost loop.
@@ -261,7 +260,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV: Interleaving is not beneficial.
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
-; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
; CHECK: LV: Interleaving disabled by the pass manager
; CHECK-NEXT: LV: Vectorizing: innermost loop.
diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll
deleted file mode 100644
index 785af15..0000000
--- a/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll
+++ /dev/null
@@ -1,77 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -p loop-vectorize -S %s | FileCheck %s
-
-target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-f128:64-n32:64-S128-ni:1:10:20"
-target triple = "wasm32-unknown-emscripten"
-
-define void @induction_phi_and_branch_cost(ptr %end, ptr %start.1, ptr %start.2) #0 {
-; CHECK-LABEL: define void @induction_phi_and_branch_cost(
-; CHECK-SAME: ptr [[END:%.*]], ptr [[START_1:%.*]], ptr [[START_2:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i32
-; CHECK-NEXT: [[START_11:%.*]] = ptrtoint ptr [[START_1]] to i32
-; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[START_11]], [[END2]]
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[N_VEC]], -4
-; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1]], i32 [[TMP3]]
-; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[N_VEC]], -4
-; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[START_2]], i32 [[TMP4]]
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], -4
-; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 -3
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP7]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START_1]], %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], %[[MIDDLE_BLOCK]] ], [ [[START_2]], %[[ENTRY]] ]
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr nusw i8, ptr [[PTR_IV]], i32 -4
-; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr i8, ptr [[PTR_IV_2]], i32 -4
-; CHECK-NEXT: store i32 0, ptr [[PTR_IV_2]], align 4
-; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
-;
-entry:
- br label %loop
-
-loop:
- %ptr.iv = phi ptr [ %start.1, %entry ], [ %ptr.iv.next, %loop ]
- %ptr.iv.2 = phi ptr [ %start.2, %entry ], [ %ptr.iv.2.next, %loop ]
- %ptr.iv.next = getelementptr nusw i8, ptr %ptr.iv, i32 -4
- %ptr.iv.2.next = getelementptr i8, ptr %ptr.iv.2, i32 -4
- store i32 0, ptr %ptr.iv.2, align 4
- %ec = icmp eq ptr %ptr.iv, %end
- br i1 %ec, label %exit, label %loop
-
-exit:
- ret void
-}
-
-attributes #0 = { "target-features"="+simd128" }
-;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-;.
diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/WebAssembly/lit.local.cfg
deleted file mode 100644
index d5f39ab..0000000
--- a/llvm/test/Transforms/LoopVectorize/WebAssembly/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not "WebAssembly" in config.root.targets:
- config.unsupported = True