diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r-- | llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 54 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopFuse.cpp | 34 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopPassManager.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp | 59 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/NewGVN.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/Reassociate.cpp | 42 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/Reg2Mem.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SROA.cpp | 34 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/Scalarizer.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp | 4 |
14 files changed, 174 insertions, 136 deletions
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index c327311..7ebcc21 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -53,6 +53,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" @@ -117,6 +118,10 @@ static cl::opt<bool> LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops")); +static cl::opt<bool> LoopPredicationTraps( + "indvars-predicate-loop-traps", cl::Hidden, cl::init(true), + cl::desc("Predicate conditions that trap in loops with only local writes")); + static cl::opt<bool> AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext")); @@ -1704,6 +1709,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { return Changed; } +static bool crashingBBWithoutEffect(const BasicBlock &BB) { + return llvm::all_of(BB, [](const Instruction &I) { + // TODO: for now this is overly restrictive, to make sure nothing in this + // BB can depend on the loop body. + // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a + // load does not have a side effect, but we could have + // %a = load ptr, ptr %ptr + // %b = load i32, ptr %a + // Now if the loop stored a non-nullptr to %a, we could cause a nullptr + // dereference by skipping over loop iterations. + if (const auto *CB = dyn_cast<CallBase>(&I)) { + if (CB->onlyAccessesInaccessibleMemory()) + return true; + } + return isa<UnreachableInst>(I); + }); +} + bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { SmallVector<BasicBlock*, 16> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -1816,11 +1839,25 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { // suggestions on how to improve this? I can obviously bail out for outer // loops, but that seems less than ideal. MemorySSA can find memory writes, // is that enough for *all* side effects? + bool HasThreadLocalSideEffects = false; for (BasicBlock *BB : L->blocks()) for (auto &I : *BB) // TODO:isGuaranteedToTransfer - if (I.mayHaveSideEffects()) - return false; + if (I.mayHaveSideEffects()) { + if (!LoopPredicationTraps) + return false; + HasThreadLocalSideEffects = true; + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { + // Simple stores cannot be observed by other threads. + // If HasThreadLocalSideEffects is set, we check + // crashingBBWithoutEffect to make sure that the crashing BB cannot + // observe them either. + if (!SI->isSimple()) + return false; + } else { + return false; + } + } bool Changed = false; // Finally, do the actual predication for all predicatable blocks. A couple @@ -1840,6 +1877,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); + if (HasThreadLocalSideEffects) { + const BasicBlock *Unreachable = nullptr; + for (const BasicBlock *Succ : BI->successors()) { + if (isa<UnreachableInst>(Succ->getTerminator())) + Unreachable = Succ; + } + // Exit BB which have one branch back into the loop and another one to + // a trap can still be optimized, because local side effects cannot + // be observed in the exit case (the trap). We could be smarter about + // this, but for now lets pattern match common cases that directly trap. + if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable)) + return Changed; + } Value *NewCond; if (ExitCount == ExactBTC) { NewCond = L->contains(BI->getSuccessor(0)) ? diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp index 20733032..19eccb9 100644 --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -368,7 +368,7 @@ private: Valid = false; } - bool reportInvalidCandidate(llvm::Statistic &Stat) const { + bool reportInvalidCandidate(Statistic &Stat) const { using namespace ore; assert(L && Preheader && "Fusion candidate not initialized properly!"); #if LLVM_ENABLE_STATS @@ -445,6 +445,7 @@ struct FusionCandidateCompare { "No dominance relationship between these fusion candidates!"); } }; +} // namespace using LoopVector = SmallVector<Loop *, 4>; @@ -461,9 +462,15 @@ using LoopVector = SmallVector<Loop *, 4>; using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>; using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>; -#if !defined(NDEBUG) -static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const FusionCandidate &FC) { +#ifndef NDEBUG +static void printLoopVector(const LoopVector &LV) { + dbgs() << "****************************\n"; + for (const Loop *L : LV) + printLoop(*L, dbgs()); + dbgs() << "****************************\n"; +} + +static raw_ostream &operator<<(raw_ostream &OS, const FusionCandidate &FC) { if (FC.isValid()) OS << FC.Preheader->getName(); else @@ -472,8 +479,8 @@ static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, return OS; } -static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const FusionCandidateSet &CandSet) { +static raw_ostream &operator<<(raw_ostream &OS, + const FusionCandidateSet &CandSet) { for (const FusionCandidate &FC : CandSet) OS << FC << '\n'; @@ -489,7 +496,9 @@ printFusionCandidates(const FusionCandidateCollection &FusionCandidates) { dbgs() << "****************************\n"; } } -#endif +#endif // NDEBUG + +namespace { /// Collect all loops in function at the same nest level, starting at the /// outermost level. @@ -550,15 +559,6 @@ private: LoopsOnLevelTy LoopsOnLevel; }; -#ifndef NDEBUG -static void printLoopVector(const LoopVector &LV) { - dbgs() << "****************************\n"; - for (auto *L : LV) - printLoop(*L, dbgs()); - dbgs() << "****************************\n"; -} -#endif - struct LoopFuser { private: // Sets of control flow equivalent fusion candidates for a given nest level. @@ -1850,7 +1850,7 @@ private: /// <Cand1 Preheader> and <Cand2 Preheader>: <Stat Description> template <typename RemarkKind> void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1, - llvm::Statistic &Stat) { + Statistic &Stat) { assert(FC0.Preheader && FC1.Preheader && "Expecting valid fusion candidates"); using namespace ore; diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp index 32078b1..d827e64 100644 --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -8,7 +8,6 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -16,8 +15,6 @@ using namespace llvm; -namespace llvm { - /// Explicitly specialize the pass manager's run method to handle loop nest /// structure updates. PreservedAnalyses @@ -185,7 +182,6 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM, } return PA; } -} // namespace llvm void FunctionToLoopPassAdaptor::printPipeline( raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { @@ -193,6 +189,7 @@ void FunctionToLoopPassAdaptor::printPipeline( Pass->printPipeline(OS, MapClassName2PassName); OS << ')'; } + PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, FunctionAnalysisManager &AM) { // Before we even compute any loop analyses, first run a miniature function @@ -219,9 +216,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, // Get the analysis results needed by loop passes. MemorySSA *MSSA = UseMemorySSA ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) : nullptr; - BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData() - ? (&AM.getResult<BlockFrequencyAnalysis>(F)) - : nullptr; LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F), AM.getResult<AssumptionAnalysis>(F), AM.getResult<DominatorTreeAnalysis>(F), @@ -229,7 +223,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, AM.getResult<ScalarEvolutionAnalysis>(F), AM.getResult<TargetLibraryAnalysis>(F), AM.getResult<TargetIRAnalysis>(F), - BFI, MSSA}; // Setup the loop analysis manager from its proxy. It is important that diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 448dc2b..f3e6cbf 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -540,8 +540,6 @@ bool LoopVersioningLICM::run(DominatorTree *DT) { return Changed; } -namespace llvm { - PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &LAR, LPMUpdater &U) { @@ -556,4 +554,3 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); } -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 7cae94eb..3487e81 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -97,6 +97,12 @@ static cl::opt<MatrixLayoutTy> MatrixLayout( static cl::opt<bool> PrintAfterTransposeOpt("matrix-print-after-transpose-opt", cl::init(false)); +static cl::opt<unsigned> SplitMatmulRemainderOverThreshold( + "matrix-split-matmul-remainder-over-threshold", cl::Hidden, + cl::desc("Illegal remainder vectors over this size in bits should be split " + "in the inner loop of matmul"), + cl::init(0)); + /// Helper function to either return Scope, if it is a subprogram or the /// attached subprogram for a local scope. static DISubprogram *getSubprogram(DIScope *Scope) { @@ -115,18 +121,16 @@ static bool isSplat(Value *V) { /// Match any mul operation (fp or integer). template <typename LTy, typename RTy> -auto m_AnyMul(const LTy &L, const RTy &R) { +static auto m_AnyMul(const LTy &L, const RTy &R) { return m_CombineOr(m_Mul(L, R), m_FMul(L, R)); } /// Match any add operation (fp or integer). template <typename LTy, typename RTy> -auto m_AnyAdd(const LTy &L, const RTy &R) { +static auto m_AnyAdd(const LTy &L, const RTy &R) { return m_CombineOr(m_Add(L, R), m_FAdd(L, R)); } -namespace { - // Given an element pointer \p BasePtr to the start of a (sub) matrix, compute // the start address of vector \p VecIdx with type (\p EltType x \p NumElements) // assuming \p Stride elements between start two consecutive vectors. @@ -167,9 +171,9 @@ namespace { // v_2_0 |v_2_1 |v_2_2 |v_2_3 // v_3_0 {v_3_1 {v_3_2 v_3_3 // -Value *computeVectorAddr(Value *BasePtr, Value *VecIdx, Value *Stride, - unsigned NumElements, Type *EltType, - IRBuilder<> &Builder) { +static Value *computeVectorAddr(Value *BasePtr, Value *VecIdx, Value *Stride, + unsigned NumElements, Type *EltType, + IRBuilder<> &Builder) { assert((!isa<ConstantInt>(Stride) || cast<ConstantInt>(Stride)->getZExtValue() >= NumElements) && @@ -338,6 +342,8 @@ computeShapeInfoForInst(Instruction *I, return std::nullopt; } +namespace { + /// LowerMatrixIntrinsics contains the methods used to lower matrix intrinsics. /// /// Currently, the lowering for each matrix intrinsic is done as follows: @@ -371,7 +377,8 @@ class LowerMatrixIntrinsics { LoopInfo *LI = nullptr; OptimizationRemarkEmitter *ORE = nullptr; - /// Contains estimates of the number of operations (loads, stores, compute) required to lower a matrix operation. + /// Contains estimates of the number of operations (loads, stores, compute) + /// required to lower a matrix operation. struct OpInfoTy { /// Number of stores emitted to generate this matrix. unsigned NumStores = 0; @@ -1719,6 +1726,31 @@ public: ToRemove.push_back(MatMul); } + /// Given \p Remainder iterations of the the matmul inner loop, + /// potentially lower \p Blocksize that is used for the underlying + /// vector. + unsigned capBlockSize(unsigned BlockSize, unsigned Remainder, Type *EltType) { + if (BlockSize <= Remainder) + return BlockSize; + + // If the remainder is also a legal type just use it. + auto *VecTy = FixedVectorType::get(EltType, Remainder); + if (TTI.isTypeLegal(VecTy)) + return Remainder; + + // Similarly, if the vector is small enough that we don't want + // to split further. + if (VecTy->getPrimitiveSizeInBits() <= SplitMatmulRemainderOverThreshold) + return Remainder; + + // Gradually lower the vectorization factor to cover the + // remainder. + do { + BlockSize /= 2; + } while (BlockSize > Remainder); + return BlockSize; + } + /// Compute \p Result += \p A * \p B for input matrices with left-associating /// addition. /// @@ -1756,10 +1788,8 @@ public: bool isSumZero = isa<ConstantAggregateZero>(Result.getColumn(J)); for (unsigned I = 0; I < R; I += BlockSize) { - // Gradually lower the vectorization factor to cover the remainder. - while (I + BlockSize > R) - BlockSize /= 2; - + // Lower block size to make sure we stay within bounds. + BlockSize = capBlockSize(BlockSize, R - I, Result.getElementType()); Value *Sum = IsTiled ? Result.extractVector(I, J, BlockSize, Builder) : nullptr; for (unsigned K = 0; K < M; ++K) { @@ -1784,9 +1814,8 @@ public: unsigned BlockSize = VF; bool isSumZero = isa<ConstantAggregateZero>(Result.getRow(I)); for (unsigned J = 0; J < C; J += BlockSize) { - // Gradually lower the vectorization factor to cover the remainder. - while (J + BlockSize > C) - BlockSize /= 2; + // Lower the vectorization factor to cover the remainder. + BlockSize = capBlockSize(BlockSize, C - J, Result.getElementType()); Value *Sum = nullptr; for (unsigned K = 0; K < M; ++K) { diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 80aa98d..5a8f18a 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -160,9 +160,6 @@ static cl::opt<bool> EnablePhiOfOps("enable-phi-of-ops", cl::init(true), //===----------------------------------------------------------------------===// // Anchor methods. -namespace llvm { -namespace GVNExpression { - Expression::~Expression() = default; BasicExpression::~BasicExpression() = default; CallExpression::~CallExpression() = default; @@ -171,9 +168,6 @@ StoreExpression::~StoreExpression() = default; AggregateValueExpression::~AggregateValueExpression() = default; PHIExpression::~PHIExpression() = default; -} // end namespace GVNExpression -} // end namespace llvm - namespace { // Tarjan's SCC finding algorithm with Nuutila's improvements diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index ba58b8e..6d7ce36 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -2623,32 +2623,32 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) { namespace { - class ReassociateLegacyPass : public FunctionPass { - ReassociatePass Impl; +class ReassociateLegacyPass : public FunctionPass { + ReassociatePass Impl; - public: - static char ID; // Pass identification, replacement for typeid +public: + static char ID; // Pass identification, replacement for typeid - ReassociateLegacyPass() : FunctionPass(ID) { - initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry()); - } + ReassociateLegacyPass() : FunctionPass(ID) { + initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry()); + } - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; - FunctionAnalysisManager DummyFAM; - auto PA = Impl.run(F, DummyFAM); - return !PA.areAllPreserved(); - } + FunctionAnalysisManager DummyFAM; + auto PA = Impl.run(F, DummyFAM); + return !PA.areAllPreserved(); + } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addPreserved<AAResultsWrapperPass>(); - AU.addPreserved<BasicAAWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } - }; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; } // end anonymous namespace diff --git a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp index 30b27cb..7646624 100644 --- a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp @@ -107,9 +107,7 @@ PreservedAnalyses RegToMemPass::run(Function &F, FunctionAnalysisManager &AM) { return PA; } -namespace llvm { - -void initializeRegToMemWrapperPassPass(PassRegistry &); +namespace { class RegToMemWrapperPass : public FunctionPass { public: @@ -136,7 +134,7 @@ public: return N != 0 || Changed; } }; -} // namespace llvm +} // namespace INITIALIZE_PASS_BEGIN(RegToMemWrapperPass, "reg2mem", "", true, true) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index a692009..5c60fad 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -344,6 +344,12 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL) { + // If we want allocas to be migrated using this helper then we need to ensure + // that the BaseFragments map code still works. A simple solution would be + // to choose to always clone alloca dbg_assigns (rather than sometimes + // "stealing" them). + assert(!isa<AllocaInst>(Inst) && "Unexpected alloca"); + auto DVRAssignMarkerRange = at::getDVRAssignmentMarkers(OldInst); // Nothing to do if OldInst has no linked dbg.assign intrinsics. if (DVRAssignMarkerRange.empty()) @@ -429,11 +435,22 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID); } - ::Value *NewValue = Value ? Value : DbgAssign->getValue(); - DbgVariableRecord *NewAssign = cast<DbgVariableRecord>(cast<DbgRecord *>( - DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr, - Dest, DIExpression::get(Expr->getContext(), {}), - DbgAssign->getDebugLoc()))); + DbgVariableRecord *NewAssign; + if (IsSplit) { + ::Value *NewValue = Value ? Value : DbgAssign->getValue(); + NewAssign = cast<DbgVariableRecord>(cast<DbgRecord *>( + DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr, + Dest, DIExpression::get(Expr->getContext(), {}), + DbgAssign->getDebugLoc()))); + } else { + // The store is not split, simply steal the existing dbg_assign. + NewAssign = DbgAssign; + NewAssign->setAssignId(NewID); // FIXME: Can we avoid generating new IDs? + NewAssign->setAddress(Dest); + if (Value) + NewAssign->replaceVariableLocationOp(0u, Value); + assert(Expr == NewAssign->getExpression()); + } // If we've updated the value but the original dbg.assign has an arglist // then kill it now - we can't use the requested new value. @@ -464,9 +481,10 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, // noted as slightly offset (in code) from the store. In practice this // should have little effect on the debugging experience due to the fact // that all the split stores should get the same line number. - NewAssign->moveBefore(DbgAssign->getIterator()); - - NewAssign->setDebugLoc(DbgAssign->getDebugLoc()); + if (NewAssign != DbgAssign) { + NewAssign->moveBefore(DbgAssign->getIterator()); + NewAssign->setDebugLoc(DbgAssign->getDebugLoc()); + } LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n"); }; diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index aae5d60..25a531c 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -50,9 +50,7 @@ using namespace llvm; #define DEBUG_TYPE "scalarizer" -namespace { - -BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) { +static BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) { BasicBlock *BB = Itr->getParent(); if (isa<PHINode>(Itr)) Itr = BB->getFirstInsertionPt(); @@ -76,6 +74,8 @@ using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>; // along with a pointer to their scattered forms. using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>; +namespace { + struct VectorSplit { // The type of the vector. FixedVectorType *VecTy = nullptr; @@ -196,6 +196,7 @@ struct VectorLayout { // The size of each (non-remainder) fragment in bytes. uint64_t SplitSize = 0; }; +} // namespace static bool isStructOfMatchingFixedVectors(Type *Ty) { if (!isa<StructType>(Ty)) @@ -268,6 +269,7 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments, return Res; } +namespace { class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> { public: ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI, diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index e4ba70d..5af6c96 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -27,7 +27,6 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -3611,8 +3610,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, - MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) { + MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) { assert(L.isRecursivelyLCSSAForm(DT, LI) && "Loops must be in LCSSA form before unswitching."); @@ -3652,35 +3650,6 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, if (F->hasOptSize()) return false; - // Returns true if Loop L's loop nest is cold, i.e. if the headers of L, - // of the loops L is nested in, and of the loops nested in L are all cold. - auto IsLoopNestCold = [&](const Loop *L) { - // Check L and all of its parent loops. - auto *Parent = L; - while (Parent) { - if (!PSI->isColdBlock(Parent->getHeader(), BFI)) - return false; - Parent = Parent->getParentLoop(); - } - // Next check all loops nested within L. - SmallVector<const Loop *, 4> Worklist; - llvm::append_range(Worklist, L->getSubLoops()); - while (!Worklist.empty()) { - auto *CurLoop = Worklist.pop_back_val(); - if (!PSI->isColdBlock(CurLoop->getHeader(), BFI)) - return false; - llvm::append_range(Worklist, CurLoop->getSubLoops()); - } - return true; - }; - - // Skip cold loops in cold loop nests, as unswitching them brings little - // benefit but increases the code size - if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) { - LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n"); - return false; - } - // Perform legality checks. if (!isSafeForNoNTrivialUnswitching(L, LI)) return false; @@ -3705,11 +3674,6 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, LPMUpdater &U) { Function &F = *L.getHeader()->getParent(); (void)F; - ProfileSummaryInfo *PSI = nullptr; - if (auto OuterProxy = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR) - .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F)) - PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"); @@ -3720,7 +3684,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, AR.MSSA->verifyMemorySSA(); } if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial, - &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U)) + &AR.SE, MSSAU ? &*MSSAU : nullptr, U)) return PreservedAnalyses::all(); if (AR.MSSA && VerifyMemorySSA) diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp index ebcbd2b..fa66a03 100644 --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -149,8 +149,6 @@ bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) { return Impl.runImpl(F, TTI); } -namespace llvm { - bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) { if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence(&F)) { LLVM_DEBUG(dbgs() << "Not running SpeculativeExecution because " @@ -328,11 +326,11 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( return true; } -FunctionPass *createSpeculativeExecutionPass() { +FunctionPass *llvm::createSpeculativeExecutionPass() { return new SpeculativeExecutionLegacyPass(); } -FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() { +FunctionPass *llvm::createSpeculativeExecutionIfHasBranchDivergencePass() { return new SpeculativeExecutionLegacyPass(/* OnlyIfDivergentTarget = */ true); } @@ -362,4 +360,3 @@ void SpeculativeExecutionPass::printPipeline( OS << "only-if-divergent-target"; OS << '>'; } -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 7d01709..e94ad19 100644 --- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -716,8 +716,6 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) { return Ret; } -namespace llvm { - PreservedAnalyses StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) { const DataLayout *DL = &F.getDataLayout(); @@ -735,5 +733,3 @@ StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) { PA.preserve<TargetIRAnalysis>(); return PA; } - -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 1d83ddc..89d41f3e 100644 --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -192,7 +192,7 @@ struct AllocaDerivedValueTracker { SmallPtrSet<Instruction *, 32> AllocaUsers; SmallPtrSet<Instruction *, 32> EscapePoints; }; -} +} // namespace static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) { if (F.callsFunctionThatReturnsTwice()) @@ -967,7 +967,7 @@ struct TailCallElim : public FunctionPass { /*BFI=*/nullptr); } }; -} +} // namespace char TailCallElim::ID = 0; INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", "Tail Call Elimination", |