aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp1
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp11
-rw-r--r--llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp47
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp14
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHelpers.h19
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp18
8 files changed, 78 insertions, 41 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index bd74388..8e76b79 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -83,7 +83,6 @@
#include <cstdint>
#include <functional>
#include <limits>
-#include <map>
#include <memory>
#include <queue>
#include <string>
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 27ec6c6..5bc9c28 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -5627,8 +5627,15 @@ bool InstCombinerImpl::run() {
for (Use &U : I->uses()) {
User *User = U.getUser();
- if (User->isDroppable())
- continue;
+ if (User->isDroppable()) {
+ // Do not sink if there are dereferenceable assumes that would be
+ // removed.
+ auto II = dyn_cast<IntrinsicInst>(User);
+ if (II->getIntrinsicID() != Intrinsic::assume ||
+ !II->getOperandBundle("dereferenceable"))
+ continue;
+ }
+
if (NumUsers > MaxSinkNumUsers)
return std::nullopt;
diff --git a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
index a577f51..4a7144f 100644
--- a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
+++ b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
@@ -78,11 +78,16 @@ DropUnnecessaryAssumesPass::run(Function &F, FunctionAnalysisManager &FAM) {
SmallVector<OperandBundleDef> KeptBundles;
unsigned NumBundles = Assume->getNumOperandBundles();
for (unsigned I = 0; I != NumBundles; ++I) {
- auto IsDead = [](OperandBundleUse Bundle) {
+ auto IsDead = [&](OperandBundleUse Bundle) {
// "ignore" operand bundles are always dead.
if (Bundle.getTagName() == "ignore")
return true;
+ // "dereferenceable" operand bundles are only dropped if requested
+ // (e.g., after loop vectorization has run).
+ if (Bundle.getTagName() == "dereferenceable")
+ return DropDereferenceable;
+
// Bundles without arguments do not affect any specific values.
// Always keep them for now.
if (Bundle.Inputs.empty())
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 666033b..566d6ea 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1232,6 +1232,30 @@ public:
/// Superset of instructions that return true for isScalarWithPredication.
bool isPredicatedInst(Instruction *I) const;
+ /// A helper function that returns how much we should divide the cost of a
+ /// predicated block by. Typically this is the reciprocal of the block
+ /// probability, i.e. if we return X we are assuming the predicated block will
+ /// execute once for every X iterations of the loop header so the block should
+ /// only contribute 1/X of its cost to the total cost calculation, but when
+ /// optimizing for code size it will just be 1 as code size costs don't depend
+ /// on execution probabilities.
+ ///
+ /// TODO: We should use actual block probability here, if available.
+ /// Currently, we always assume predicated blocks have a 50% chance of
+ /// executing, apart from blocks that are only predicated due to tail folding.
+ inline unsigned
+ getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind,
+ BasicBlock *BB) const {
+ // If a block wasn't originally predicated but was predicated due to
+ // e.g. tail folding, don't divide the cost. Tail folded loops may still be
+ // predicated in the final vector loop iteration, but for most loops that
+ // don't have low trip counts we can expect their probability to be close to
+ // zero.
+ if (!Legal->blockNeedsPredication(BB))
+ return 1;
+ return CostKind == TTI::TCK_CodeSize ? 1 : 2;
+ }
+
/// Return the costs for our two available strategies for lowering a
/// div/rem operation which requires speculating at least one lane.
/// First result is for scalarization (will be invalid for scalable
@@ -2887,7 +2911,8 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
// Scale the cost by the probability of executing the predicated blocks.
// This assumes the predicated block for each vector lane is equally
// likely.
- ScalarizationCost = ScalarizationCost / getPredBlockCostDivisor(CostKind);
+ ScalarizationCost =
+ ScalarizationCost / getPredBlockCostDivisor(CostKind, I->getParent());
}
InstructionCost SafeDivisorCost = 0;
@@ -5032,7 +5057,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
}
// Scale the total scalar cost by block probability.
- ScalarCost /= getPredBlockCostDivisor(CostKind);
+ ScalarCost /= getPredBlockCostDivisor(CostKind, I->getParent());
// Compute the discount. A non-negative discount means the vector version
// of the instruction costs more, and scalarizing would be beneficial.
@@ -5082,10 +5107,11 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
// stores and instructions that may divide by zero) will now be
// unconditionally executed. For the scalar case, we may not always execute
// the predicated block, if it is an if-else block. Thus, scale the block's
- // cost by the probability of executing it. blockNeedsPredication from
- // Legal is used so as to not include all blocks in tail folded loops.
- if (VF.isScalar() && Legal->blockNeedsPredication(BB))
- BlockCost /= getPredBlockCostDivisor(CostKind);
+ // cost by the probability of executing it.
+ // getPredBlockCostDivisor will return 1 for blocks that are only predicated
+ // by the header mask when folding the tail.
+ if (VF.isScalar())
+ BlockCost /= getPredBlockCostDivisor(CostKind, BB);
Cost += BlockCost;
}
@@ -5164,7 +5190,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// conditional branches, but may not be executed for each vector lane. Scale
// the cost by the probability of executing the predicated block.
if (isPredicatedInst(I)) {
- Cost /= getPredBlockCostDivisor(CostKind);
+ Cost /= getPredBlockCostDivisor(CostKind, I->getParent());
// Add the cost of an i1 extract and a branch
auto *VecI1Ty =
@@ -6732,6 +6758,10 @@ bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const {
SkipCostComputation.contains(UI);
}
+unsigned VPCostContext::getPredBlockCostDivisor(BasicBlock *BB) const {
+ return CM.getPredBlockCostDivisor(CostKind, BB);
+}
+
InstructionCost
LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
VPCostContext &CostCtx) const {
@@ -8157,9 +8187,10 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
return new VPWidenSelectRecipe(*cast<SelectInst>(Instr), R->operands());
if (Instruction::isCast(VPI->getOpcode())) {
+ auto *CastR = cast<VPInstructionWithType>(R);
auto *CI = cast<CastInst>(Instr);
return new VPWidenCastRecipe(CI->getOpcode(), VPI->getOperand(0),
- CI->getType(), *CI);
+ CastR->getResultType(), *CI);
}
return tryToWiden(VPI);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f405c40..663e31a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/MDBuilder.h"
#define DEBUG_TYPE "vplan"
@@ -233,10 +234,15 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
for (Value *Op : Inst->operands())
VPOperands.push_back(getOrCreateVPOperand(Op));
- // Build VPInstruction for any arbitrary Instruction without specific
- // representation in VPlan.
- NewR = cast<VPInstruction>(
- VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
+ if (auto *CI = dyn_cast<CastInst>(Inst)) {
+ NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
+ CI->getType(), CI->getDebugLoc());
+ NewR->setUnderlyingValue(CI);
+ } else {
+ // Build VPInstruction for any arbitrary Instruction without specific
+ // representation in VPlan.
+ NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst);
+ }
}
IRDef2VPValue[Inst] = NewR;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index 965426f..caabfa7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -50,21 +50,6 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
int64_t Step);
-/// A helper function that returns how much we should divide the cost of a
-/// predicated block by. Typically this is the reciprocal of the block
-/// probability, i.e. if we return X we are assuming the predicated block will
-/// execute once for every X iterations of the loop header so the block should
-/// only contribute 1/X of its cost to the total cost calculation, but when
-/// optimizing for code size it will just be 1 as code size costs don't depend
-/// on execution probabilities.
-///
-/// TODO: We should use actual block probability here, if available. Currently,
-/// we always assume predicated blocks have a 50% chance of executing.
-inline unsigned
-getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind) {
- return CostKind == TTI::TCK_CodeSize ? 1 : 2;
-}
-
/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
/// [1, 16) = {1, 2, 4, 8}
@@ -367,6 +352,10 @@ struct VPCostContext {
/// has already been pre-computed.
bool skipCostComputation(Instruction *UI, bool IsVector) const;
+ /// \returns how much the cost of a predicated block should be divided by.
+ /// Forwards to LoopVectorizationCostModel::getPredBlockCostDivisor.
+ unsigned getPredBlockCostDivisor(BasicBlock *BB) const;
+
/// Returns the OperandInfo for \p V, if it is a live-in.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 80cd112..707886f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3349,7 +3349,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
// Scale the cost by the probability of executing the predicated blocks.
// This assumes the predicated block for each vector lane is equally
// likely.
- ScalarCost /= getPredBlockCostDivisor(Ctx.CostKind);
+ ScalarCost /= Ctx.getPredBlockCostDivisor(UI->getParent());
return ScalarCost;
}
case Instruction::Load:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 634df51..b319fbc7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1745,17 +1745,17 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
if (match(Term, m_BranchOnCount()) ||
match(Term, m_BranchOnCond(m_Not(m_ActiveLaneMask(
m_VPValue(), m_VPValue(), m_VPValue()))))) {
- // Try to simplify the branch condition if TC <= VF * UF when the latch
- // terminator is BranchOnCount or BranchOnCond where the input is
- // Not(ActiveLaneMask).
- const SCEV *TripCount =
- vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
- assert(!isa<SCEVCouldNotCompute>(TripCount) &&
+ // Try to simplify the branch condition if VectorTC <= VF * UF when the
+ // latch terminator is BranchOnCount or BranchOnCond(Not(ActiveLaneMask)).
+ const SCEV *VectorTripCount =
+ vputils::getSCEVExprForVPValue(&Plan.getVectorTripCount(), SE);
+ if (isa<SCEVCouldNotCompute>(VectorTripCount))
+ VectorTripCount = vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
+ assert(!isa<SCEVCouldNotCompute>(VectorTripCount) &&
"Trip count SCEV must be computable");
ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
- const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
- if (TripCount->isZero() ||
- !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
+ const SCEV *C = SE.getElementCount(VectorTripCount->getType(), NumElements);
+ if (!SE.isKnownPredicate(CmpInst::ICMP_ULE, VectorTripCount, C))
return false;
} else if (match(Term, m_BranchOnCond(m_VPValue(Cond)))) {
// For BranchOnCond, check if we can prove the condition to be true using VF