diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r-- | llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/GVN.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/GVNSink.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/InferAlignment.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 49 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopPassManager.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp | 42 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp | 43 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp | 1 |
11 files changed, 118 insertions, 96 deletions
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 6141b6d..4ac1321 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -272,7 +272,7 @@ static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, if (KillingII->getIntrinsicID() == Intrinsic::masked_store) { // Masks. // TODO: check that KillingII's mask is a superset of the DeadII's mask. - if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3)) + if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2)) return OW_Unknown; } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) { // Masks. diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 2afa7b7..e30f306 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -1017,14 +1017,14 @@ private: }; auto MaskOp = [](const IntrinsicInst *II) { if (II->getIntrinsicID() == Intrinsic::masked_load) - return II->getOperand(2); + return II->getOperand(1); if (II->getIntrinsicID() == Intrinsic::masked_store) - return II->getOperand(3); + return II->getOperand(2); llvm_unreachable("Unexpected IntrinsicInst"); }; auto ThruOp = [](const IntrinsicInst *II) { if (II->getIntrinsicID() == Intrinsic::masked_load) - return II->getOperand(3); + return II->getOperand(2); llvm_unreachable("Unexpected IntrinsicInst"); }; diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 42db424..72e1131 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2212,11 +2212,11 @@ bool GVNPass::processMaskedLoad(IntrinsicInst *I) { if (!DepInst || !Dep.isLocal() || !Dep.isDef()) return false; - Value *Mask = I->getOperand(2); - Value *Passthrough = I->getOperand(3); + Value *Mask = I->getOperand(1); + Value *Passthrough = I->getOperand(2); Value *StoreVal; - if (!match(DepInst, m_MaskedStore(m_Value(StoreVal), m_Value(), m_Value(), - m_Specific(Mask))) || + if (!match(DepInst, + m_MaskedStore(m_Value(StoreVal), m_Value(), m_Specific(Mask))) || StoreVal->getType() != I->getType()) return false; diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp index b9534def..a06f832 100644 --- a/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -430,6 +430,7 @@ public: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::PtrToInt: + case Instruction::PtrToAddr: case Instruction::IntToPtr: case Instruction::BitCast: case Instruction::AddrSpaceCast: diff --git a/llvm/lib/Transforms/Scalar/InferAlignment.cpp b/llvm/lib/Transforms/Scalar/InferAlignment.cpp index 995b803..39751c0 100644 --- a/llvm/lib/Transforms/Scalar/InferAlignment.cpp +++ b/llvm/lib/Transforms/Scalar/InferAlignment.cpp @@ -45,25 +45,20 @@ static bool tryToImproveAlign( switch (II->getIntrinsicID()) { case Intrinsic::masked_load: case Intrinsic::masked_store: { - int AlignOpIdx = II->getIntrinsicID() == Intrinsic::masked_load ? 1 : 2; - Value *PtrOp = II->getIntrinsicID() == Intrinsic::masked_load - ? II->getArgOperand(0) - : II->getArgOperand(1); + unsigned PtrOpIdx = II->getIntrinsicID() == Intrinsic::masked_load ? 0 : 1; + Value *PtrOp = II->getArgOperand(PtrOpIdx); Type *Type = II->getIntrinsicID() == Intrinsic::masked_load ? II->getType() : II->getArgOperand(0)->getType(); - Align OldAlign = - cast<ConstantInt>(II->getArgOperand(AlignOpIdx))->getAlignValue(); + Align OldAlign = II->getParamAlign(PtrOpIdx).valueOrOne(); Align PrefAlign = DL.getPrefTypeAlign(Type); Align NewAlign = Fn(PtrOp, OldAlign, PrefAlign); - if (NewAlign <= OldAlign || - NewAlign.value() > std::numeric_limits<uint32_t>().max()) + if (NewAlign <= OldAlign) return false; - Value *V = - ConstantInt::get(Type::getInt32Ty(II->getContext()), NewAlign.value()); - II->setOperand(AlignOpIdx, V); + II->addParamAttr(PtrOpIdx, + Attribute::getWithAlignment(II->getContext(), NewAlign)); return true; } default: diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 28ae4f0..9aaf6a5 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -43,6 +43,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include <cassert> #include <utility> @@ -1872,6 +1873,51 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch); } +/// This deals with a corner case when a LCSSA phi node appears in a non-exit +/// block: the outer loop latch block does not need to be exit block of the +/// inner loop. Consider a loop that was in LCSSA form, but then some +/// transformation like loop-unswitch comes along and creates an empty block, +/// where BB5 in this example is the outer loop latch block: +/// +/// BB4: +/// br label %BB5 +/// BB5: +/// %old.cond.lcssa = phi i16 [ %cond, %BB4 ] +/// br outer.header +/// +/// Interchange then brings it in LCSSA form again resulting in this chain of +/// single-input phi nodes: +/// +/// BB4: +/// %new.cond.lcssa = phi i16 [ %cond, %BB3 ] +/// br label %BB5 +/// BB5: +/// %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ] +/// +/// The problem is that interchange can reoder blocks BB4 and BB5 placing the +/// use before the def if we don't check this. The solution is to simplify +/// lcssa phi nodes (remove) if they appear in non-exit blocks. +/// +static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop) { + BasicBlock *InnerLoopExit = InnerLoop->getExitBlock(); + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + + // Do not modify lcssa phis where they actually belong, i.e. in exit blocks. + if (OuterLoopLatch == InnerLoopExit) + return; + + // Collect and remove phis in non-exit blocks if they have 1 input. + SmallVector<PHINode *, 8> Phis( + llvm::make_pointer_range(OuterLoopLatch->phis())); + for (PHINode *Phi : Phis) { + assert(Phi->getNumIncomingValues() == 1 && "Single input phi expected"); + LLVM_DEBUG(dbgs() << "Removing 1-input phi in non-exit block: " << *Phi + << "\n"); + Phi->replaceAllUsesWith(Phi->getIncomingValue(0)); + Phi->eraseFromParent(); + } +} + bool LoopInterchangeTransform::adjustLoopBranches() { LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n"); std::vector<DominatorTree::UpdateType> DTUpdates; @@ -1882,6 +1928,9 @@ bool LoopInterchangeTransform::adjustLoopBranches() { assert(OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && "Guaranteed by loop-simplify form"); + + simplifyLCSSAPhis(OuterLoop, InnerLoop); + // Ensure that both preheaders do not contain PHI nodes and have single // predecessors. This allows us to move them easily. We use // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp index 7da8586..d827e64 100644 --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -8,7 +8,6 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -217,9 +216,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, // Get the analysis results needed by loop passes. MemorySSA *MSSA = UseMemorySSA ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) : nullptr; - BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData() - ? (&AM.getResult<BlockFrequencyAnalysis>(F)) - : nullptr; LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F), AM.getResult<AssumptionAnalysis>(F), AM.getResult<DominatorTreeAnalysis>(F), @@ -227,7 +223,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, AM.getResult<ScalarEvolutionAnalysis>(F), AM.getResult<TargetLibraryAnalysis>(F), AM.getResult<TargetIRAnalysis>(F), - BFI, MSSA}; // Setup the loop analysis manager from its proxy. It is important that diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index ed4e2b1..3487e81 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -97,6 +97,12 @@ static cl::opt<MatrixLayoutTy> MatrixLayout( static cl::opt<bool> PrintAfterTransposeOpt("matrix-print-after-transpose-opt", cl::init(false)); +static cl::opt<unsigned> SplitMatmulRemainderOverThreshold( + "matrix-split-matmul-remainder-over-threshold", cl::Hidden, + cl::desc("Illegal remainder vectors over this size in bits should be split " + "in the inner loop of matmul"), + cl::init(0)); + /// Helper function to either return Scope, if it is a subprogram or the /// attached subprogram for a local scope. static DISubprogram *getSubprogram(DIScope *Scope) { @@ -1720,6 +1726,31 @@ public: ToRemove.push_back(MatMul); } + /// Given \p Remainder iterations of the the matmul inner loop, + /// potentially lower \p Blocksize that is used for the underlying + /// vector. + unsigned capBlockSize(unsigned BlockSize, unsigned Remainder, Type *EltType) { + if (BlockSize <= Remainder) + return BlockSize; + + // If the remainder is also a legal type just use it. + auto *VecTy = FixedVectorType::get(EltType, Remainder); + if (TTI.isTypeLegal(VecTy)) + return Remainder; + + // Similarly, if the vector is small enough that we don't want + // to split further. + if (VecTy->getPrimitiveSizeInBits() <= SplitMatmulRemainderOverThreshold) + return Remainder; + + // Gradually lower the vectorization factor to cover the + // remainder. + do { + BlockSize /= 2; + } while (BlockSize > Remainder); + return BlockSize; + } + /// Compute \p Result += \p A * \p B for input matrices with left-associating /// addition. /// @@ -1757,10 +1788,8 @@ public: bool isSumZero = isa<ConstantAggregateZero>(Result.getColumn(J)); for (unsigned I = 0; I < R; I += BlockSize) { - // Gradually lower the vectorization factor to cover the remainder. - while (I + BlockSize > R) - BlockSize /= 2; - + // Lower block size to make sure we stay within bounds. + BlockSize = capBlockSize(BlockSize, R - I, Result.getElementType()); Value *Sum = IsTiled ? Result.extractVector(I, J, BlockSize, Builder) : nullptr; for (unsigned K = 0; K < M; ++K) { @@ -1785,9 +1814,8 @@ public: unsigned BlockSize = VF; bool isSumZero = isa<ConstantAggregateZero>(Result.getRow(I)); for (unsigned J = 0; J < C; J += BlockSize) { - // Gradually lower the vectorization factor to cover the remainder. - while (J + BlockSize > C) - BlockSize /= 2; + // Lower the vectorization factor to cover the remainder. + BlockSize = capBlockSize(BlockSize, C - J, Result.getElementType()); Value *Sum = nullptr; for (unsigned K = 0; K < M; ++K) { diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp index 42d6680..146e7d1 100644 --- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp +++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp @@ -111,7 +111,7 @@ static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth, } // Translate a masked load intrinsic like -// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, +// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, // <16 x i1> %mask, <16 x i32> %passthru) // to a chain of basic blocks, with loading element one-by-one if // the appropriate mask bit is set @@ -146,11 +146,10 @@ static void scalarizeMaskedLoad(const DataLayout &DL, bool HasBranchDivergence, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT) { Value *Ptr = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); + Value *Mask = CI->getArgOperand(1); + Value *Src0 = CI->getArgOperand(2); - const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); + const Align AlignVal = CI->getParamAlign(0).valueOrOne(); VectorType *VecType = cast<FixedVectorType>(CI->getType()); Type *EltTy = VecType->getElementType(); @@ -290,7 +289,7 @@ static void scalarizeMaskedLoad(const DataLayout &DL, bool HasBranchDivergence, } // Translate a masked store intrinsic, like -// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, +// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, // <16 x i1> %mask) // to a chain of basic blocks, that stores element one-by-one if // the appropriate mask bit is set @@ -320,10 +319,9 @@ static void scalarizeMaskedStore(const DataLayout &DL, bool HasBranchDivergence, bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptr = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); + Value *Mask = CI->getArgOperand(2); - const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); + const Align AlignVal = CI->getParamAlign(1).valueOrOne(); auto *VecType = cast<VectorType>(Src->getType()); Type *EltTy = VecType->getElementType(); @@ -472,9 +470,8 @@ static void scalarizeMaskedGather(const DataLayout &DL, bool HasBranchDivergence, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT) { Value *Ptrs = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); + Value *Mask = CI->getArgOperand(1); + Value *Src0 = CI->getArgOperand(2); auto *VecType = cast<FixedVectorType>(CI->getType()); Type *EltTy = VecType->getElementType(); @@ -483,7 +480,7 @@ static void scalarizeMaskedGather(const DataLayout &DL, Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); - MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); + Align AlignVal = CI->getParamAlign(0).valueOrOne(); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); @@ -608,8 +605,7 @@ static void scalarizeMaskedScatter(const DataLayout &DL, DomTreeUpdater *DTU, bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptrs = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); + Value *Mask = CI->getArgOperand(2); auto *SrcFVTy = cast<FixedVectorType>(Src->getType()); @@ -623,7 +619,7 @@ static void scalarizeMaskedScatter(const DataLayout &DL, Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); + Align AlignVal = CI->getParamAlign(1).valueOrOne(); unsigned VectorWidth = SrcFVTy->getNumElements(); // Shorten the way if the mask is a vector of constants. @@ -1125,8 +1121,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, case Intrinsic::masked_load: // Scalarize unsupported vector masked load if (TTI.isLegalMaskedLoad( - CI->getType(), - cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue(), + CI->getType(), CI->getParamAlign(0).valueOrOne(), cast<PointerType>(CI->getArgOperand(0)->getType()) ->getAddressSpace())) return false; @@ -1135,18 +1130,15 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, case Intrinsic::masked_store: if (TTI.isLegalMaskedStore( CI->getArgOperand(0)->getType(), - cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue(), + CI->getParamAlign(1).valueOrOne(), cast<PointerType>(CI->getArgOperand(1)->getType()) ->getAddressSpace())) return false; scalarizeMaskedStore(DL, HasBranchDivergence, CI, DTU, ModifiedDT); return true; case Intrinsic::masked_gather: { - MaybeAlign MA = - cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue(); + Align Alignment = CI->getParamAlign(0).valueOrOne(); Type *LoadTy = CI->getType(); - Align Alignment = DL.getValueOrABITypeAlignment(MA, - LoadTy->getScalarType()); if (TTI.isLegalMaskedGather(LoadTy, Alignment) && !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment)) return false; @@ -1154,11 +1146,8 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, return true; } case Intrinsic::masked_scatter: { - MaybeAlign MA = - cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue(); + Align Alignment = CI->getParamAlign(1).valueOrOne(); Type *StoreTy = CI->getArgOperand(0)->getType(); - Align Alignment = DL.getValueOrABITypeAlignment(MA, - StoreTy->getScalarType()); if (TTI.isLegalMaskedScatter(StoreTy, Alignment) && !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy), Alignment)) diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index e4ba70d..5af6c96 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -27,7 +27,6 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -3611,8 +3610,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, - MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) { + MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) { assert(L.isRecursivelyLCSSAForm(DT, LI) && "Loops must be in LCSSA form before unswitching."); @@ -3652,35 +3650,6 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, if (F->hasOptSize()) return false; - // Returns true if Loop L's loop nest is cold, i.e. if the headers of L, - // of the loops L is nested in, and of the loops nested in L are all cold. - auto IsLoopNestCold = [&](const Loop *L) { - // Check L and all of its parent loops. - auto *Parent = L; - while (Parent) { - if (!PSI->isColdBlock(Parent->getHeader(), BFI)) - return false; - Parent = Parent->getParentLoop(); - } - // Next check all loops nested within L. - SmallVector<const Loop *, 4> Worklist; - llvm::append_range(Worklist, L->getSubLoops()); - while (!Worklist.empty()) { - auto *CurLoop = Worklist.pop_back_val(); - if (!PSI->isColdBlock(CurLoop->getHeader(), BFI)) - return false; - llvm::append_range(Worklist, CurLoop->getSubLoops()); - } - return true; - }; - - // Skip cold loops in cold loop nests, as unswitching them brings little - // benefit but increases the code size - if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) { - LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n"); - return false; - } - // Perform legality checks. if (!isSafeForNoNTrivialUnswitching(L, LI)) return false; @@ -3705,11 +3674,6 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, LPMUpdater &U) { Function &F = *L.getHeader()->getParent(); (void)F; - ProfileSummaryInfo *PSI = nullptr; - if (auto OuterProxy = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR) - .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F)) - PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"); @@ -3720,7 +3684,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, AR.MSSA->verifyMemorySSA(); } if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial, - &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U)) + &AR.SE, MSSAU ? &*MSSAU : nullptr, U)) return PreservedAnalyses::all(); if (AR.MSSA && VerifyMemorySSA) diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp index fa66a03..23e1243 100644 --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -227,6 +227,7 @@ static InstructionCost ComputeSpeculationCost(const Instruction *I, case Instruction::Call: case Instruction::BitCast: case Instruction::PtrToInt: + case Instruction::PtrToAddr: case Instruction::IntToPtr: case Instruction::AddrSpaceCast: case Instruction::FPToUI: |