diff options
Diffstat (limited to 'llvm/lib/Transforms')
9 files changed, 173 insertions, 80 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 805bdb4..bbbac45 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -28,8 +28,12 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ProfDataUtils.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -39,6 +43,10 @@ using namespace PatternMatch; #define DEBUG_TYPE "aggressive-instcombine" +namespace llvm { +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} + STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded"); STATISTIC(NumGuardedRotates, "Number of guarded rotates transformed into funnel shifts"); @@ -599,6 +607,14 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL) { auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0)); auto Select = B.CreateSelect(Cmp, B.CreateZExt(ZeroTableElem, XType), Cttz); + // The true branch of select handles the cttz(0) case, which is rare. + if (!ProfcheckDisableMetadataFixes) { + if (Instruction *SelectI = dyn_cast<Instruction>(Select)) + SelectI->setMetadata( + LLVMContext::MD_prof, + MDBuilder(SelectI->getContext()).createUnlikelyBranchWeights()); + } + // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target // it should be handled as: `cttz(x) & (typeSize - 1)`. diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp index 9b9e2ba..9150b58 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp @@ -459,7 +459,7 @@ void TruncInstCombine::ReduceExpressionGraph(Type *SclTy) { Value *Op0 = I->getOperand(0); Value *LHS = getReducedOperand(I->getOperand(1), SclTy); Value *RHS = getReducedOperand(I->getOperand(2), SclTy); - Res = Builder.CreateSelect(Op0, LHS, RHS); + Res = Builder.CreateSelect(Op0, LHS, RHS, "", I); break; } case Instruction::PHI: { diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp index 9115946..f166fef 100644 --- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp @@ -24,6 +24,9 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -33,6 +36,11 @@ using namespace llvm; #define DEBUG_TYPE "coro-annotation-elide" +static cl::opt<float> CoroElideBranchRatio( + "coro-elide-branch-ratio", cl::init(0.55), cl::Hidden, + cl::desc("Minimum BranchProbability to consider a elide a coroutine.")); +extern cl::opt<unsigned> MinBlockCounterExecution; + static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) { for (Instruction &I : F->getEntryBlock()) if (!isa<AllocaInst>(&I)) @@ -145,6 +153,30 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C, bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine(); bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe); if (IsCallerPresplitCoroutine && HasAttr) { + BranchProbability MinBranchProbability( + static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution), + MinBlockCounterExecution); + + auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller); + + auto Prob = BranchProbability::getBranchProbability( + BFI.getBlockFreq(CB->getParent()).getFrequency(), + BFI.getEntryFreq().getFrequency()); + + if (Prob < MinBranchProbability) { + ORE.emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "CoroAnnotationElideUnlikely", Caller) + << "'" << ore::NV("callee", Callee->getName()) + << "' not elided in '" + << ore::NV("caller", Caller->getName()) + << "' because of low probability: " + << ore::NV("probability", Prob) << " (threshold: " + << ore::NV("threshold", MinBranchProbability) << ")"; + }); + continue; + } + auto *CallerN = CG.lookup(*Caller); auto *CallerC = CallerN ? CG.lookupSCC(*CallerN) : nullptr; // If CallerC is nullptr, it means LazyCallGraph hasn't visited Caller @@ -156,7 +188,7 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C, return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller) << "'" << ore::NV("callee", Callee->getName()) << "' elided in '" << ore::NV("caller", Caller->getName()) - << "'"; + << "' (probability: " << ore::NV("probability", Prob) << ")"; }); FAM.invalidate(*Caller, PreservedAnalyses::none()); diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index 2583249..1a00d17 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -109,7 +109,7 @@ static cl::opt<float> MinRegionSizeRatio( "outline candidate and original function")); // Used to tune the minimum number of execution counts needed in the predecessor // block to the cold edge. ie. confidence interval. -static cl::opt<unsigned> +cl::opt<unsigned> MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid")); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 9b272c4..3ddf182 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -28,6 +28,10 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" +namespace llvm { +extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} + /// This is the complement of getICmpCode, which turns an opcode and two /// operands into either a constant true or false, or a brand new ICmp /// instruction. The sign is passed in to determine which kind of predicate to @@ -1272,7 +1276,8 @@ Value *InstCombinerImpl::foldEqOfParts(Value *Cmp0, Value *Cmp1, bool IsAnd) { static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd, bool IsLogical, InstCombiner::BuilderTy &Builder, - const SimplifyQuery &Q) { + const SimplifyQuery &Q, + Instruction &I) { // Match an equality compare with a non-poison constant as Cmp0. // Also, give up if the compare can be constant-folded to avoid looping. CmpPredicate Pred0; @@ -1306,9 +1311,12 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, return nullptr; SubstituteCmp = Builder.CreateICmp(Pred1, Y, C); } - if (IsLogical) - return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp) - : Builder.CreateLogicalOr(Cmp0, SubstituteCmp); + if (IsLogical) { + Instruction *MDFrom = + ProfcheckDisableMetadataFixes && isa<SelectInst>(I) ? nullptr : &I; + return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp, "", MDFrom) + : Builder.CreateLogicalOr(Cmp0, SubstituteCmp, "", MDFrom); + } return Builder.CreateBinOp(IsAnd ? Instruction::And : Instruction::Or, Cmp0, SubstituteCmp); } @@ -3396,13 +3404,13 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, /*IsLogical*/ false, Builder)) return V; - if (Value *V = - foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical, Builder, Q)) + if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical, + Builder, Q, I)) return V; // We can convert this case to bitwise and, because both operands are used // on the LHS, and as such poison from both will propagate. - if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, IsAnd, - /*IsLogical=*/false, Builder, Q)) { + if (Value *V = foldAndOrOfICmpsWithConstEq( + RHS, LHS, IsAnd, /*IsLogical=*/false, Builder, Q, I)) { // If RHS is still used, we should drop samesign flag. if (IsLogical && RHS->hasSameSign() && !RHS->use_empty()) { RHS->setSameSign(false); diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 5b8ea15..b74a070 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -1084,8 +1084,10 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, auto ThenTerm = SplitBlockAndInsertIfThen( IRB.CreateIsNull(Load), &*IP, false, MDBuilder(IRB.getContext()).createUnlikelyBranchWeights()); - IRBuilder<> ThenIRB(ThenTerm); + InstrumentationIRBuilder ThenIRB(ThenTerm); auto Store = ThenIRB.CreateStore(ConstantInt::getTrue(Int1Ty), FlagPtr); + if (EntryLoc) + Store->setDebugLoc(EntryLoc); Load->setNoSanitizeMetadata(); Store->setNoSanitizeMetadata(); } @@ -1131,7 +1133,10 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, EstimatedStackSize >= Options.StackDepthCallbackMin) { if (InsertBefore) IRB.SetInsertPoint(InsertBefore); - IRB.CreateCall(SanCovStackDepthCallback)->setCannotMerge(); + auto Call = IRB.CreateCall(SanCovStackDepthCallback); + if (EntryLoc) + Call->setDebugLoc(EntryLoc); + Call->setCannotMerge(); } } else { // Check stack depth. If it's the deepest so far, record it. @@ -1144,8 +1149,10 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, auto ThenTerm = SplitBlockAndInsertIfThen( IsStackLower, &*IP, false, MDBuilder(IRB.getContext()).createUnlikelyBranchWeights()); - IRBuilder<> ThenIRB(ThenTerm); + InstrumentationIRBuilder ThenIRB(ThenTerm); auto Store = ThenIRB.CreateStore(FrameAddrInt, SanCovLowestStack); + if (EntryLoc) + Store->setDebugLoc(EntryLoc); LowestStack->setNoSanitizeMetadata(); Store->setNoSanitizeMetadata(); } diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index e448230..f4e05a2 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -61,6 +61,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DomTreeUpdater.h" @@ -382,19 +383,28 @@ typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap; inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { - OS << "< "; - for (const BasicBlock *BB : Path) { - std::string BBName; - if (BB->hasName()) - raw_string_ostream(BBName) << BB->getName(); - else - raw_string_ostream(BBName) << BB; - OS << BBName << " "; - } - OS << ">"; + auto BBNames = llvm::map_range( + Path, [](const BasicBlock *BB) { return BB->getNameOrAsOperand(); }); + OS << "< " << llvm::join(BBNames, ", ") << " >"; return OS; } +/// Helper to get the successor corresponding to a particular case value for +/// a switch statement. +static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, + const APInt &NextState) { + BasicBlock *NextCase = nullptr; + for (auto Case : Switch->cases()) { + if (Case.getCaseValue()->getValue() == NextState) { + NextCase = Case.getCaseSuccessor(); + break; + } + } + if (!NextCase) + NextCase = Switch->getDefaultDest(); + return NextCase; +} + namespace { /// ThreadingPath is a path in the control flow of a loop that can be threaded /// by cloning necessary basic blocks and replacing conditional branches with @@ -407,6 +417,10 @@ struct ThreadingPath { ExitVal = V->getValue(); IsExitValSet = true; } + void setExitValue(const APInt &V) { + ExitVal = V; + IsExitValSet = true; + } bool isExitValueSet() const { return IsExitValSet; } /// Determinator is the basic block that determines the next state of the DFA. @@ -423,7 +437,7 @@ struct ThreadingPath { } void print(raw_ostream &OS) const { - OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]"; + OS << Path << " [ " << ExitVal << ", " << DBB->getNameOrAsOperand() << " ]"; } private: @@ -589,44 +603,8 @@ struct AllSwitchPaths { BasicBlock *getSwitchBlock() { return SwitchBlock; } void run() { - StateDefMap StateDef = getStateDefMap(); - if (StateDef.empty()) { - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", - Switch) - << "Switch instruction is not predictable."; - }); - return; - } - - auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0)); - auto *SwitchPhiDefBB = SwitchPhi->getParent(); - VisitedBlocks VB; - // Get paths from the determinator BBs to SwitchPhiDefBB - std::vector<ThreadingPath> PathsToPhiDef = - getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths); - if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) { - TPaths = std::move(PathsToPhiDef); - return; - } - - assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty()); - auto PathsLimit = MaxNumPaths / PathsToPhiDef.size(); - // Find and append paths from SwitchPhiDefBB to SwitchBlock. - PathsType PathsToSwitchBB = - paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit); - if (PathsToSwitchBB.empty()) - return; - - std::vector<ThreadingPath> TempList; - for (const ThreadingPath &Path : PathsToPhiDef) { - for (const PathType &PathToSw : PathsToSwitchBB) { - ThreadingPath PathCopy(Path); - PathCopy.appendExcludingFirst(PathToSw); - TempList.push_back(PathCopy); - } - } - TPaths = std::move(TempList); + findTPaths(); + unifyTPaths(); } private: @@ -818,6 +796,69 @@ private: return Res; } + // Find all threadable paths. + void findTPaths() { + StateDefMap StateDef = getStateDefMap(); + if (StateDef.empty()) { + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", + Switch) + << "Switch instruction is not predictable."; + }); + return; + } + + auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0)); + auto *SwitchPhiDefBB = SwitchPhi->getParent(); + VisitedBlocks VB; + // Get paths from the determinator BBs to SwitchPhiDefBB + std::vector<ThreadingPath> PathsToPhiDef = + getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths); + if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) { + TPaths = std::move(PathsToPhiDef); + return; + } + + assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty()); + auto PathsLimit = MaxNumPaths / PathsToPhiDef.size(); + // Find and append paths from SwitchPhiDefBB to SwitchBlock. + PathsType PathsToSwitchBB = + paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit); + if (PathsToSwitchBB.empty()) + return; + + std::vector<ThreadingPath> TempList; + for (const ThreadingPath &Path : PathsToPhiDef) { + for (const PathType &PathToSw : PathsToSwitchBB) { + ThreadingPath PathCopy(Path); + PathCopy.appendExcludingFirst(PathToSw); + TempList.push_back(PathCopy); + } + } + TPaths = std::move(TempList); + } + + // Two states are equivalent if they have the same switch destination. + // Unify the states in different threading path if the states are equivalent. + void unifyTPaths() { + llvm::SmallDenseMap<BasicBlock *, APInt> DestToState; + for (ThreadingPath &Path : TPaths) { + APInt NextState = Path.getExitValue(); + BasicBlock *Dest = getNextCaseSuccessor(Switch, NextState); + auto StateIt = DestToState.find(Dest); + if (StateIt == DestToState.end()) { + DestToState.insert({Dest, NextState}); + continue; + } + + if (NextState != StateIt->second) { + LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to " + << StateIt->second << "\n"); + Path.setExitValue(StateIt->second); + } + } + } + unsigned NumVisited = 0; SwitchInst *Switch; BasicBlock *SwitchBlock; @@ -1341,21 +1382,6 @@ private: return It != ClonedBBs.end() ? (*It).BB : nullptr; } - /// Helper to get the successor corresponding to a particular case value for - /// a switch statement. - BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) { - BasicBlock *NextCase = nullptr; - for (auto Case : Switch->cases()) { - if (Case.getCaseValue()->getValue() == NextState) { - NextCase = Case.getCaseSuccessor(); - break; - } - } - if (!NextCase) - NextCase = Switch->getDefaultDest(); - return NextCase; - } - /// Returns true if IncomingBB is a predecessor of BB. bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { return llvm::is_contained(predecessors(BB), IncomingBB); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 155fcc5..9ac3be1 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5959,7 +5959,11 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI, unsigned PreviousEdges = OtherCases->size(); if (OtherDest == SI->getDefaultDest()) ++PreviousEdges; - for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) + unsigned E = PreviousEdges - 1; + // Remove all incoming values from OtherDest if OtherDest is unreachable. + if (NewBI->isUnconditional()) + ++E; + for (unsigned I = 0; I != E; ++I) cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3f16b03..e62d57e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5696,7 +5696,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { Instruction *I = Worklist.pop_back_val(); for (auto &Op : I->operands()) if (auto *InstOp = dyn_cast<Instruction>(Op)) - if ((InstOp->getParent() == I->getParent()) && !isa<PHINode>(InstOp) && + if (TheLoop->contains(InstOp) && !isa<PHINode>(InstOp) && AddrDefs.insert(InstOp).second) Worklist.push_back(InstOp); } |