aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis/ScalarEvolution.cpp
diff options
context:
space:
mode:
authorJulian Nagele <j.nagele@apple.com>2024-11-15 10:03:08 +0000
committerGitHub <noreply@github.com>2024-11-15 10:03:08 +0000
commit7c8e05aa45f006401b71b37127537c4682fe16ee (patch)
tree09d92eec749187604be4c7be708b0156e87e35ee /llvm/lib/Analysis/ScalarEvolution.cpp
parente5ac9145ba2951b6454b13499f375284bdbde689 (diff)
downloadllvm-7c8e05aa45f006401b71b37127537c4682fe16ee.zip
llvm-7c8e05aa45f006401b71b37127537c4682fe16ee.tar.gz
llvm-7c8e05aa45f006401b71b37127537c4682fe16ee.tar.bz2
[SCEV] Collect and merge loop guards through PHI nodes with multiple incoming values (#113915)
This patch aims to strengthen collection of loop guards by processing PHI nodes with multiple incoming values as follows: collect guards for all incoming values/blocks and try to merge them into a single one for the PHI node. The goal is to determine tighter bounds on the trip counts of scalar tail loops after vectorization, helping to avoid unnecessary transforms. In particular we'd like to avoid vectorizing scalar tails of hand-vectorized loops, for example in [Transforms/PhaseOrdering/X86/pr38280.ll](https://github.com/llvm/llvm-project/blob/231e03ba7e82896847dbc27d457dbb208f04699c/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll), discovered via https://github.com/llvm/llvm-project/pull/108190 Compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=a55248789ed3f653740e0723d016203b9d585f26&to=500e4c46e79f60b93b11a752698c520e345948e3&stat=instructions:u PR: https://github.com/llvm/llvm-project/pull/113915
Diffstat (limited to 'llvm/lib/Analysis/ScalarEvolution.cpp')
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp111
1 files changed, 101 insertions, 10 deletions
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index b108111..5182285 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -222,6 +222,10 @@ static cl::opt<unsigned> RangeIterThreshold(
cl::desc("Threshold for switching to iteratively computing SCEV ranges"),
cl::init(32));
+static cl::opt<unsigned> MaxLoopGuardCollectionDepth(
+ "scalar-evolution-max-loop-guard-collection-depth", cl::Hidden,
+ cl::desc("Maximum depth for recrusive loop guard collection"), cl::init(1));
+
static cl::opt<bool>
ClassifyExpressions("scalar-evolution-classify-expressions",
cl::Hidden, cl::init(true),
@@ -10666,7 +10670,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB)
if (const Loop *L = LI.getLoopFor(BB))
return {L->getLoopPredecessor(), L->getHeader()};
- return {nullptr, nullptr};
+ return {nullptr, BB};
}
/// SCEV structural equivalence is usually sufficient for testing whether two
@@ -15245,7 +15249,81 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,
ScalarEvolution::LoopGuards
ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Pred = L->getLoopPredecessor();
LoopGuards Guards(SE);
+ SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
+ collectFromBlock(SE, Guards, Header, Pred, VisitedBlocks);
+ return Guards;
+}
+
+void ScalarEvolution::LoopGuards::collectFromPHI(
+ ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
+ const PHINode &Phi, SmallPtrSetImpl<const BasicBlock *> &VisitedBlocks,
+ SmallDenseMap<const BasicBlock *, LoopGuards> &IncomingGuards,
+ unsigned Depth) {
+ if (!SE.isSCEVable(Phi.getType()))
+ return;
+
+ using MinMaxPattern = std::pair<const SCEVConstant *, SCEVTypes>;
+ auto GetMinMaxConst = [&](unsigned IncomingIdx) -> MinMaxPattern {
+ const BasicBlock *InBlock = Phi.getIncomingBlock(IncomingIdx);
+ if (!VisitedBlocks.insert(InBlock).second)
+ return {nullptr, scCouldNotCompute};
+ auto [G, Inserted] = IncomingGuards.try_emplace(InBlock, LoopGuards(SE));
+ if (Inserted)
+ collectFromBlock(SE, G->second, Phi.getParent(), InBlock, VisitedBlocks,
+ Depth + 1);
+ auto S = G->second.RewriteMap.find(
+ SE.getSCEV(Phi.getIncomingValue(IncomingIdx)));
+ if (S == G->second.RewriteMap.end())
+ return {nullptr, scCouldNotCompute};
+ auto *SM = dyn_cast_if_present<SCEVMinMaxExpr>(S->second);
+ if (!SM)
+ return {nullptr, scCouldNotCompute};
+ if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(SM->getOperand(0)))
+ return {C0, SM->getSCEVType()};
+ if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(SM->getOperand(1)))
+ return {C1, SM->getSCEVType()};
+ return {nullptr, scCouldNotCompute};
+ };
+ auto MergeMinMaxConst = [](MinMaxPattern P1,
+ MinMaxPattern P2) -> MinMaxPattern {
+ auto [C1, T1] = P1;
+ auto [C2, T2] = P2;
+ if (!C1 || !C2 || T1 != T2)
+ return {nullptr, scCouldNotCompute};
+ switch (T1) {
+ case scUMaxExpr:
+ return {C1->getAPInt().ult(C2->getAPInt()) ? C1 : C2, T1};
+ case scSMaxExpr:
+ return {C1->getAPInt().slt(C2->getAPInt()) ? C1 : C2, T1};
+ case scUMinExpr:
+ return {C1->getAPInt().ugt(C2->getAPInt()) ? C1 : C2, T1};
+ case scSMinExpr:
+ return {C1->getAPInt().sgt(C2->getAPInt()) ? C1 : C2, T1};
+ default:
+ llvm_unreachable("Trying to merge non-MinMaxExpr SCEVs.");
+ }
+ };
+ auto P = GetMinMaxConst(0);
+ for (unsigned int In = 1; In < Phi.getNumIncomingValues(); In++) {
+ if (!P.first)
+ break;
+ P = MergeMinMaxConst(P, GetMinMaxConst(In));
+ }
+ if (P.first) {
+ const SCEV *LHS = SE.getSCEV(const_cast<PHINode *>(&Phi));
+ SmallVector<const SCEV *, 2> Ops({P.first, LHS});
+ const SCEV *RHS = SE.getMinMaxExpr(P.second, Ops);
+ Guards.RewriteMap.insert({LHS, RHS});
+ }
+}
+
+void ScalarEvolution::LoopGuards::collectFromBlock(
+ ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
+ const BasicBlock *Block, const BasicBlock *Pred,
+ SmallPtrSetImpl<const BasicBlock *> &VisitedBlocks, unsigned Depth) {
SmallVector<const SCEV *> ExprsToRewrite;
auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
const SCEV *RHS,
@@ -15584,14 +15662,13 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
}
};
- BasicBlock *Header = L->getHeader();
SmallVector<PointerIntPair<Value *, 1, bool>> Terms;
// First, collect information from assumptions dominating the loop.
for (auto &AssumeVH : SE.AC.assumptions()) {
if (!AssumeVH)
continue;
auto *AssumeI = cast<CallInst>(AssumeVH);
- if (!SE.DT.dominates(AssumeI, Header))
+ if (!SE.DT.dominates(AssumeI, Block))
continue;
Terms.emplace_back(AssumeI->getOperand(0), true);
}
@@ -15602,8 +15679,8 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
if (GuardDecl)
for (const auto *GU : GuardDecl->users())
if (const auto *Guard = dyn_cast<IntrinsicInst>(GU))
- if (Guard->getFunction() == Header->getParent() &&
- SE.DT.dominates(Guard, Header))
+ if (Guard->getFunction() == Block->getParent() &&
+ SE.DT.dominates(Guard, Block))
Terms.emplace_back(Guard->getArgOperand(0), true);
// Third, collect conditions from dominating branches. Starting at the loop
@@ -15611,11 +15688,10 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
// predecessors that can be found that have unique successors leading to the
// original header.
// TODO: share this logic with isLoopEntryGuardedByCond.
- for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
- L->getLoopPredecessor(), Header);
- Pair.first;
+ std::pair<const BasicBlock *, const BasicBlock *> Pair(Pred, Block);
+ for (; Pair.first;
Pair = SE.getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
-
+ VisitedBlocks.insert(Pair.second);
const BranchInst *LoopEntryPredicate =
dyn_cast<BranchInst>(Pair.first->getTerminator());
if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional())
@@ -15623,6 +15699,22 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
Terms.emplace_back(LoopEntryPredicate->getCondition(),
LoopEntryPredicate->getSuccessor(0) == Pair.second);
+
+ // If we are recursively collecting guards stop after 2
+ // predecessors to limit compile-time impact for now.
+ if (Depth > 0 && Terms.size() == 2)
+ break;
+ }
+ // Finally, if we stopped climbing the predecessor chain because
+ // there wasn't a unique one to continue, try to collect conditions
+ // for PHINodes by recursively following all of their incoming
+ // blocks and try to merge the found conditions to build a new one
+ // for the Phi.
+ if (Pair.second->hasNPredecessorsOrMore(2) &&
+ Depth < MaxLoopGuardCollectionDepth) {
+ SmallDenseMap<const BasicBlock *, LoopGuards> IncomingGuards;
+ for (auto &Phi : Pair.second->phis())
+ collectFromPHI(SE, Guards, Phi, VisitedBlocks, IncomingGuards, Depth);
}
// Now apply the information from the collected conditions to
@@ -15679,7 +15771,6 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
Guards.RewriteMap.insert({Expr, Guards.rewrite(RewriteTo)});
}
}
- return Guards;
}
const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {