aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp2
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp34
-rw-r--r--llvm/lib/Transforms/IPO/PartialInlining.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp24
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp138
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2
7 files changed, 142 insertions, 66 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
index 9b9e2ba..9150b58 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
@@ -459,7 +459,7 @@ void TruncInstCombine::ReduceExpressionGraph(Type *SclTy) {
Value *Op0 = I->getOperand(0);
Value *LHS = getReducedOperand(I->getOperand(1), SclTy);
Value *RHS = getReducedOperand(I->getOperand(2), SclTy);
- Res = Builder.CreateSelect(Op0, LHS, RHS);
+ Res = Builder.CreateSelect(Op0, LHS, RHS, "", I);
break;
}
case Instruction::PHI: {
diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
index 9115946..f166fef 100644
--- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
@@ -24,6 +24,9 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -33,6 +36,11 @@ using namespace llvm;
#define DEBUG_TYPE "coro-annotation-elide"
+static cl::opt<float> CoroElideBranchRatio(
+ "coro-elide-branch-ratio", cl::init(0.55), cl::Hidden,
+ cl::desc("Minimum BranchProbability to consider a elide a coroutine."));
+extern cl::opt<unsigned> MinBlockCounterExecution;
+
static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
for (Instruction &I : F->getEntryBlock())
if (!isa<AllocaInst>(&I))
@@ -145,6 +153,30 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
if (IsCallerPresplitCoroutine && HasAttr) {
+ BranchProbability MinBranchProbability(
+ static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution),
+ MinBlockCounterExecution);
+
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
+
+ auto Prob = BranchProbability::getBranchProbability(
+ BFI.getBlockFreq(CB->getParent()).getFrequency(),
+ BFI.getEntryFreq().getFrequency());
+
+ if (Prob < MinBranchProbability) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(
+ DEBUG_TYPE, "CoroAnnotationElideUnlikely", Caller)
+ << "'" << ore::NV("callee", Callee->getName())
+ << "' not elided in '"
+ << ore::NV("caller", Caller->getName())
+ << "' because of low probability: "
+ << ore::NV("probability", Prob) << " (threshold: "
+ << ore::NV("threshold", MinBranchProbability) << ")";
+ });
+ continue;
+ }
+
auto *CallerN = CG.lookup(*Caller);
auto *CallerC = CallerN ? CG.lookupSCC(*CallerN) : nullptr;
// If CallerC is nullptr, it means LazyCallGraph hasn't visited Caller
@@ -156,7 +188,7 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' elided in '" << ore::NV("caller", Caller->getName())
- << "'";
+ << "' (probability: " << ore::NV("probability", Prob) << ")";
});
FAM.invalidate(*Caller, PreservedAnalyses::none());
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 2583249..1a00d17 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -109,7 +109,7 @@ static cl::opt<float> MinRegionSizeRatio(
"outline candidate and original function"));
// Used to tune the minimum number of execution counts needed in the predecessor
// block to the cold edge. ie. confidence interval.
-static cl::opt<unsigned>
+cl::opt<unsigned>
MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
cl::desc("Minimum block executions to consider "
"its BranchProbabilityInfo valid"));
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 9b272c4..3ddf182 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -28,6 +28,10 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
+
/// This is the complement of getICmpCode, which turns an opcode and two
/// operands into either a constant true or false, or a brand new ICmp
/// instruction. The sign is passed in to determine which kind of predicate to
@@ -1272,7 +1276,8 @@ Value *InstCombinerImpl::foldEqOfParts(Value *Cmp0, Value *Cmp1, bool IsAnd) {
static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
bool IsAnd, bool IsLogical,
InstCombiner::BuilderTy &Builder,
- const SimplifyQuery &Q) {
+ const SimplifyQuery &Q,
+ Instruction &I) {
// Match an equality compare with a non-poison constant as Cmp0.
// Also, give up if the compare can be constant-folded to avoid looping.
CmpPredicate Pred0;
@@ -1306,9 +1311,12 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
return nullptr;
SubstituteCmp = Builder.CreateICmp(Pred1, Y, C);
}
- if (IsLogical)
- return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp)
- : Builder.CreateLogicalOr(Cmp0, SubstituteCmp);
+ if (IsLogical) {
+ Instruction *MDFrom =
+ ProfcheckDisableMetadataFixes && isa<SelectInst>(I) ? nullptr : &I;
+ return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp, "", MDFrom)
+ : Builder.CreateLogicalOr(Cmp0, SubstituteCmp, "", MDFrom);
+ }
return Builder.CreateBinOp(IsAnd ? Instruction::And : Instruction::Or, Cmp0,
SubstituteCmp);
}
@@ -3396,13 +3404,13 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
/*IsLogical*/ false, Builder))
return V;
- if (Value *V =
- foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical, Builder, Q))
+ if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical,
+ Builder, Q, I))
return V;
// We can convert this case to bitwise and, because both operands are used
// on the LHS, and as such poison from both will propagate.
- if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, IsAnd,
- /*IsLogical=*/false, Builder, Q)) {
+ if (Value *V = foldAndOrOfICmpsWithConstEq(
+ RHS, LHS, IsAnd, /*IsLogical=*/false, Builder, Q, I)) {
// If RHS is still used, we should drop samesign flag.
if (IsLogical && RHS->hasSameSign() && !RHS->use_empty()) {
RHS->setSameSign(false);
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 3f7003d..f4e05a2 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -389,6 +389,22 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
return OS;
}
+/// Helper to get the successor corresponding to a particular case value for
+/// a switch statement.
+static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch,
+ const APInt &NextState) {
+ BasicBlock *NextCase = nullptr;
+ for (auto Case : Switch->cases()) {
+ if (Case.getCaseValue()->getValue() == NextState) {
+ NextCase = Case.getCaseSuccessor();
+ break;
+ }
+ }
+ if (!NextCase)
+ NextCase = Switch->getDefaultDest();
+ return NextCase;
+}
+
namespace {
/// ThreadingPath is a path in the control flow of a loop that can be threaded
/// by cloning necessary basic blocks and replacing conditional branches with
@@ -401,6 +417,10 @@ struct ThreadingPath {
ExitVal = V->getValue();
IsExitValSet = true;
}
+ void setExitValue(const APInt &V) {
+ ExitVal = V;
+ IsExitValSet = true;
+ }
bool isExitValueSet() const { return IsExitValSet; }
/// Determinator is the basic block that determines the next state of the DFA.
@@ -583,44 +603,8 @@ struct AllSwitchPaths {
BasicBlock *getSwitchBlock() { return SwitchBlock; }
void run() {
- StateDefMap StateDef = getStateDefMap();
- if (StateDef.empty()) {
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
- Switch)
- << "Switch instruction is not predictable.";
- });
- return;
- }
-
- auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0));
- auto *SwitchPhiDefBB = SwitchPhi->getParent();
- VisitedBlocks VB;
- // Get paths from the determinator BBs to SwitchPhiDefBB
- std::vector<ThreadingPath> PathsToPhiDef =
- getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths);
- if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) {
- TPaths = std::move(PathsToPhiDef);
- return;
- }
-
- assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty());
- auto PathsLimit = MaxNumPaths / PathsToPhiDef.size();
- // Find and append paths from SwitchPhiDefBB to SwitchBlock.
- PathsType PathsToSwitchBB =
- paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit);
- if (PathsToSwitchBB.empty())
- return;
-
- std::vector<ThreadingPath> TempList;
- for (const ThreadingPath &Path : PathsToPhiDef) {
- for (const PathType &PathToSw : PathsToSwitchBB) {
- ThreadingPath PathCopy(Path);
- PathCopy.appendExcludingFirst(PathToSw);
- TempList.push_back(PathCopy);
- }
- }
- TPaths = std::move(TempList);
+ findTPaths();
+ unifyTPaths();
}
private:
@@ -812,6 +796,69 @@ private:
return Res;
}
+ // Find all threadable paths.
+ void findTPaths() {
+ StateDefMap StateDef = getStateDefMap();
+ if (StateDef.empty()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
+ Switch)
+ << "Switch instruction is not predictable.";
+ });
+ return;
+ }
+
+ auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0));
+ auto *SwitchPhiDefBB = SwitchPhi->getParent();
+ VisitedBlocks VB;
+ // Get paths from the determinator BBs to SwitchPhiDefBB
+ std::vector<ThreadingPath> PathsToPhiDef =
+ getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths);
+ if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) {
+ TPaths = std::move(PathsToPhiDef);
+ return;
+ }
+
+ assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty());
+ auto PathsLimit = MaxNumPaths / PathsToPhiDef.size();
+ // Find and append paths from SwitchPhiDefBB to SwitchBlock.
+ PathsType PathsToSwitchBB =
+ paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit);
+ if (PathsToSwitchBB.empty())
+ return;
+
+ std::vector<ThreadingPath> TempList;
+ for (const ThreadingPath &Path : PathsToPhiDef) {
+ for (const PathType &PathToSw : PathsToSwitchBB) {
+ ThreadingPath PathCopy(Path);
+ PathCopy.appendExcludingFirst(PathToSw);
+ TempList.push_back(PathCopy);
+ }
+ }
+ TPaths = std::move(TempList);
+ }
+
+ // Two states are equivalent if they have the same switch destination.
+ // Unify the states in different threading path if the states are equivalent.
+ void unifyTPaths() {
+ llvm::SmallDenseMap<BasicBlock *, APInt> DestToState;
+ for (ThreadingPath &Path : TPaths) {
+ APInt NextState = Path.getExitValue();
+ BasicBlock *Dest = getNextCaseSuccessor(Switch, NextState);
+ auto StateIt = DestToState.find(Dest);
+ if (StateIt == DestToState.end()) {
+ DestToState.insert({Dest, NextState});
+ continue;
+ }
+
+ if (NextState != StateIt->second) {
+ LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to "
+ << StateIt->second << "\n");
+ Path.setExitValue(StateIt->second);
+ }
+ }
+ }
+
unsigned NumVisited = 0;
SwitchInst *Switch;
BasicBlock *SwitchBlock;
@@ -1335,21 +1382,6 @@ private:
return It != ClonedBBs.end() ? (*It).BB : nullptr;
}
- /// Helper to get the successor corresponding to a particular case value for
- /// a switch statement.
- BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) {
- BasicBlock *NextCase = nullptr;
- for (auto Case : Switch->cases()) {
- if (Case.getCaseValue()->getValue() == NextState) {
- NextCase = Case.getCaseSuccessor();
- break;
- }
- }
- if (!NextCase)
- NextCase = Switch->getDefaultDest();
- return NextCase;
- }
-
/// Returns true if IncomingBB is a predecessor of BB.
bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
return llvm::is_contained(predecessors(BB), IncomingBB);
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 155fcc5..9ac3be1 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5959,7 +5959,11 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
unsigned PreviousEdges = OtherCases->size();
if (OtherDest == SI->getDefaultDest())
++PreviousEdges;
- for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
+ unsigned E = PreviousEdges - 1;
+ // Remove all incoming values from OtherDest if OtherDest is unreachable.
+ if (NewBI->isUnconditional())
+ ++E;
+ for (unsigned I = 0; I != E; ++I)
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3f16b03..e62d57e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5696,7 +5696,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
Instruction *I = Worklist.pop_back_val();
for (auto &Op : I->operands())
if (auto *InstOp = dyn_cast<Instruction>(Op))
- if ((InstOp->getParent() == I->getParent()) && !isa<PHINode>(InstOp) &&
+ if (TheLoop->contains(InstOp) && !isa<PHINode>(InstOp) &&
AddrDefs.insert(InstOp).second)
Worklist.push_back(InstOp);
}