aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp44
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h5
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHelpers.h19
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp56
8 files changed, 96 insertions, 47 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index b5548d4..8c8d16a6 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1944,6 +1944,10 @@ void InstrLowerer::emitNameData() {
NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
GlobalValue::PrivateLinkage, NamesVal,
getInstrProfNamesVarName());
+ if (isGPUProfTarget(M)) {
+ NamesVar->setLinkage(GlobalValue::ExternalLinkage);
+ NamesVar->setVisibility(GlobalValue::ProtectedVisibility);
+ }
NamesSize = CompressedNameStr.size();
setGlobalVariableLargeSection(TT, *NamesVar);
diff --git a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
index a577f51..4a7144f 100644
--- a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
+++ b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
@@ -78,11 +78,16 @@ DropUnnecessaryAssumesPass::run(Function &F, FunctionAnalysisManager &FAM) {
SmallVector<OperandBundleDef> KeptBundles;
unsigned NumBundles = Assume->getNumOperandBundles();
for (unsigned I = 0; I != NumBundles; ++I) {
- auto IsDead = [](OperandBundleUse Bundle) {
+ auto IsDead = [&](OperandBundleUse Bundle) {
// "ignore" operand bundles are always dead.
if (Bundle.getTagName() == "ignore")
return true;
+ // "dereferenceable" operand bundles are only dropped if requested
+ // (e.g., after loop vectorization has run).
+ if (Bundle.getTagName() == "dereferenceable")
+ return DropDereferenceable;
+
// Bundles without arguments do not affect any specific values.
// Always keep them for now.
if (Bundle.Inputs.empty())
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 45b5570..566d6ea 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1232,6 +1232,30 @@ public:
/// Superset of instructions that return true for isScalarWithPredication.
bool isPredicatedInst(Instruction *I) const;
+ /// A helper function that returns how much we should divide the cost of a
+ /// predicated block by. Typically this is the reciprocal of the block
+ /// probability, i.e. if we return X we are assuming the predicated block will
+ /// execute once for every X iterations of the loop header so the block should
+ /// only contribute 1/X of its cost to the total cost calculation, but when
+ /// optimizing for code size it will just be 1 as code size costs don't depend
+ /// on execution probabilities.
+ ///
+ /// TODO: We should use actual block probability here, if available.
+ /// Currently, we always assume predicated blocks have a 50% chance of
+ /// executing, apart from blocks that are only predicated due to tail folding.
+ inline unsigned
+ getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind,
+ BasicBlock *BB) const {
+ // If a block wasn't originally predicated but was predicated due to
+ // e.g. tail folding, don't divide the cost. Tail folded loops may still be
+ // predicated in the final vector loop iteration, but for most loops that
+ // don't have low trip counts we can expect their probability to be close to
+ // zero.
+ if (!Legal->blockNeedsPredication(BB))
+ return 1;
+ return CostKind == TTI::TCK_CodeSize ? 1 : 2;
+ }
+
/// Return the costs for our two available strategies for lowering a
/// div/rem operation which requires speculating at least one lane.
/// First result is for scalarization (will be invalid for scalable
@@ -2887,7 +2911,8 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
// Scale the cost by the probability of executing the predicated blocks.
// This assumes the predicated block for each vector lane is equally
// likely.
- ScalarizationCost = ScalarizationCost / getPredBlockCostDivisor(CostKind);
+ ScalarizationCost =
+ ScalarizationCost / getPredBlockCostDivisor(CostKind, I->getParent());
}
InstructionCost SafeDivisorCost = 0;
@@ -5032,7 +5057,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
}
// Scale the total scalar cost by block probability.
- ScalarCost /= getPredBlockCostDivisor(CostKind);
+ ScalarCost /= getPredBlockCostDivisor(CostKind, I->getParent());
// Compute the discount. A non-negative discount means the vector version
// of the instruction costs more, and scalarizing would be beneficial.
@@ -5082,10 +5107,11 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
// stores and instructions that may divide by zero) will now be
// unconditionally executed. For the scalar case, we may not always execute
// the predicated block, if it is an if-else block. Thus, scale the block's
- // cost by the probability of executing it. blockNeedsPredication from
- // Legal is used so as to not include all blocks in tail folded loops.
- if (VF.isScalar() && Legal->blockNeedsPredication(BB))
- BlockCost /= getPredBlockCostDivisor(CostKind);
+ // cost by the probability of executing it.
+ // getPredBlockCostDivisor will return 1 for blocks that are only predicated
+ // by the header mask when folding the tail.
+ if (VF.isScalar())
+ BlockCost /= getPredBlockCostDivisor(CostKind, BB);
Cost += BlockCost;
}
@@ -5164,7 +5190,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// conditional branches, but may not be executed for each vector lane. Scale
// the cost by the probability of executing the predicated block.
if (isPredicatedInst(I)) {
- Cost /= getPredBlockCostDivisor(CostKind);
+ Cost /= getPredBlockCostDivisor(CostKind, I->getParent());
// Add the cost of an i1 extract and a branch
auto *VecI1Ty =
@@ -6732,6 +6758,10 @@ bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const {
SkipCostComputation.contains(UI);
}
+unsigned VPCostContext::getPredBlockCostDivisor(BasicBlock *BB) const {
+ return CM.getPredBlockCostDivisor(CostKind, BB);
+}
+
InstructionCost
LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
VPCostContext &CostCtx) const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 5e4303a..90696ff 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -99,20 +99,20 @@ VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
VPValue::~VPValue() {
assert(Users.empty() && "trying to delete a VPValue with remaining users");
- if (Def)
+ if (VPDef *Def = getDefiningRecipe())
Def->removeDefinedValue(this);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
- if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
+ if (const VPRecipeBase *R = getDefiningRecipe())
R->print(OS, "", SlotTracker);
else
printAsOperand(OS, SlotTracker);
}
void VPValue::dump() const {
- const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
+ const VPRecipeBase *Instr = getDefiningRecipe();
VPSlotTracker SlotTracker(
(Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
print(dbgs(), SlotTracker);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 5851b3a..72858e1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -959,6 +959,11 @@ public:
/// Add metadata with kind \p Kind and \p Node.
void addMetadata(unsigned Kind, MDNode *Node) {
+ assert(none_of(Metadata,
+ [Kind](const std::pair<unsigned, MDNode *> &P) {
+ return P.first == Kind;
+ }) &&
+ "Kind must appear at most once in Metadata");
Metadata.emplace_back(Kind, Node);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index 965426f..caabfa7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -50,21 +50,6 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
int64_t Step);
-/// A helper function that returns how much we should divide the cost of a
-/// predicated block by. Typically this is the reciprocal of the block
-/// probability, i.e. if we return X we are assuming the predicated block will
-/// execute once for every X iterations of the loop header so the block should
-/// only contribute 1/X of its cost to the total cost calculation, but when
-/// optimizing for code size it will just be 1 as code size costs don't depend
-/// on execution probabilities.
-///
-/// TODO: We should use actual block probability here, if available. Currently,
-/// we always assume predicated blocks have a 50% chance of executing.
-inline unsigned
-getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind) {
- return CostKind == TTI::TCK_CodeSize ? 1 : 2;
-}
-
/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
/// [1, 16) = {1, 2, 4, 8}
@@ -367,6 +352,10 @@ struct VPCostContext {
/// has already been pre-computed.
bool skipCostComputation(Instruction *UI, bool IsVector) const;
+ /// \returns how much the cost of a predicated block should be divided by.
+ /// Forwards to LoopVectorizationCostModel::getPredBlockCostDivisor.
+ unsigned getPredBlockCostDivisor(BasicBlock *BB) const;
+
/// Returns the OperandInfo for \p V, if it is a live-in.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 80cd112..707886f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3349,7 +3349,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
// Scale the cost by the probability of executing the predicated blocks.
// This assumes the predicated block for each vector lane is equally
// likely.
- ScalarCost /= getPredBlockCostDivisor(Ctx.CostKind);
+ ScalarCost /= Ctx.getPredBlockCostDivisor(UI->getParent());
return ScalarCost;
}
case Instruction::Load:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 634df51..eab6426 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1420,10 +1420,26 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
// broadcasts.
if (!vputils::isSingleScalar(RepOrWidenR) ||
!all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) {
- return U->usesScalars(RepOrWidenR) ||
- match(cast<VPRecipeBase>(U),
- m_CombineOr(m_ExtractLastElement(m_VPValue()),
- m_ExtractLastLanePerPart(m_VPValue())));
+ if (auto *Store = dyn_cast<VPWidenStoreRecipe>(U)) {
+ // VPWidenStore doesn't have users, and stores are always
+ // profitable to widen: hence, permitting single-scalar stored
+ // values is an important leaf condition. The assert must hold as
+ // we checked the RepOrWidenR operand against
+ // vputils::isSingleScalar.
+ assert(RepOrWidenR == Store->getAddr() ||
+ vputils::isSingleScalar(Store->getStoredValue()));
+ return true;
+ }
+
+ if (auto *VPI = dyn_cast<VPInstruction>(U)) {
+ unsigned Opcode = VPI->getOpcode();
+ if (Opcode == VPInstruction::ExtractLastElement ||
+ Opcode == VPInstruction::ExtractLastLanePerPart ||
+ Opcode == VPInstruction::ExtractPenultimateElement)
+ return true;
+ }
+
+ return U->usesScalars(RepOrWidenR);
}))
continue;
@@ -1745,17 +1761,17 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
if (match(Term, m_BranchOnCount()) ||
match(Term, m_BranchOnCond(m_Not(m_ActiveLaneMask(
m_VPValue(), m_VPValue(), m_VPValue()))))) {
- // Try to simplify the branch condition if TC <= VF * UF when the latch
- // terminator is BranchOnCount or BranchOnCond where the input is
- // Not(ActiveLaneMask).
- const SCEV *TripCount =
- vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
- assert(!isa<SCEVCouldNotCompute>(TripCount) &&
+ // Try to simplify the branch condition if VectorTC <= VF * UF when the
+ // latch terminator is BranchOnCount or BranchOnCond(Not(ActiveLaneMask)).
+ const SCEV *VectorTripCount =
+ vputils::getSCEVExprForVPValue(&Plan.getVectorTripCount(), SE);
+ if (isa<SCEVCouldNotCompute>(VectorTripCount))
+ VectorTripCount = vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
+ assert(!isa<SCEVCouldNotCompute>(VectorTripCount) &&
"Trip count SCEV must be computable");
ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
- const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
- if (TripCount->isZero() ||
- !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
+ const SCEV *C = SE.getElementCount(VectorTripCount->getType(), NumElements);
+ if (!SE.isKnownPredicate(CmpInst::ICMP_ULE, VectorTripCount, C))
return false;
} else if (match(Term, m_BranchOnCond(m_VPValue(Cond)))) {
// For BranchOnCond, check if we can prove the condition to be true using VF
@@ -4131,13 +4147,13 @@ VPlanTransforms::expandSCEVs(VPlan &Plan, ScalarEvolution &SE) {
/// is defined at \p Idx of a load interleave group.
static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx,
VPValue *OpV, unsigned Idx) {
- auto *DefR = OpV->getDefiningRecipe();
- if (!DefR)
- return WideMember0->getOperand(OpIdx) == OpV;
- if (auto *W = dyn_cast<VPWidenLoadRecipe>(DefR))
- return !W->getMask() && WideMember0->getOperand(OpIdx) == OpV;
-
- if (auto *IR = dyn_cast<VPInterleaveRecipe>(DefR))
+ VPValue *Member0Op = WideMember0->getOperand(OpIdx);
+ VPRecipeBase *Member0OpR = Member0Op->getDefiningRecipe();
+ if (!Member0OpR)
+ return Member0Op == OpV;
+ if (auto *W = dyn_cast<VPWidenLoadRecipe>(Member0OpR))
+ return !W->getMask() && Member0Op == OpV;
+ if (auto *IR = dyn_cast<VPInterleaveRecipe>(Member0OpR))
return IR->getInterleaveGroup()->isFull() && IR->getVPValue(Idx) == OpV;
return false;
}