aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/EarlyCSE.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/InferAlignment.cpp17
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp49
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPassManager.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp42
-rw-r--r--llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp43
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp40
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp1
11 files changed, 118 insertions, 96 deletions
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 6141b6d..4ac1321 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -272,7 +272,7 @@ static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
// Masks.
// TODO: check that KillingII's mask is a superset of the DeadII's mask.
- if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
+ if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
return OW_Unknown;
} else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
// Masks.
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 2afa7b7..e30f306 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -1017,14 +1017,14 @@ private:
};
auto MaskOp = [](const IntrinsicInst *II) {
if (II->getIntrinsicID() == Intrinsic::masked_load)
- return II->getOperand(2);
+ return II->getOperand(1);
if (II->getIntrinsicID() == Intrinsic::masked_store)
- return II->getOperand(3);
+ return II->getOperand(2);
llvm_unreachable("Unexpected IntrinsicInst");
};
auto ThruOp = [](const IntrinsicInst *II) {
if (II->getIntrinsicID() == Intrinsic::masked_load)
- return II->getOperand(3);
+ return II->getOperand(2);
llvm_unreachable("Unexpected IntrinsicInst");
};
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 42db424..72e1131 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -2212,11 +2212,11 @@ bool GVNPass::processMaskedLoad(IntrinsicInst *I) {
if (!DepInst || !Dep.isLocal() || !Dep.isDef())
return false;
- Value *Mask = I->getOperand(2);
- Value *Passthrough = I->getOperand(3);
+ Value *Mask = I->getOperand(1);
+ Value *Passthrough = I->getOperand(2);
Value *StoreVal;
- if (!match(DepInst, m_MaskedStore(m_Value(StoreVal), m_Value(), m_Value(),
- m_Specific(Mask))) ||
+ if (!match(DepInst,
+ m_MaskedStore(m_Value(StoreVal), m_Value(), m_Specific(Mask))) ||
StoreVal->getType() != I->getType())
return false;
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index b9534def..a06f832 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -430,6 +430,7 @@ public:
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::PtrToInt:
+ case Instruction::PtrToAddr:
case Instruction::IntToPtr:
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
diff --git a/llvm/lib/Transforms/Scalar/InferAlignment.cpp b/llvm/lib/Transforms/Scalar/InferAlignment.cpp
index 995b803..39751c0 100644
--- a/llvm/lib/Transforms/Scalar/InferAlignment.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAlignment.cpp
@@ -45,25 +45,20 @@ static bool tryToImproveAlign(
switch (II->getIntrinsicID()) {
case Intrinsic::masked_load:
case Intrinsic::masked_store: {
- int AlignOpIdx = II->getIntrinsicID() == Intrinsic::masked_load ? 1 : 2;
- Value *PtrOp = II->getIntrinsicID() == Intrinsic::masked_load
- ? II->getArgOperand(0)
- : II->getArgOperand(1);
+ unsigned PtrOpIdx = II->getIntrinsicID() == Intrinsic::masked_load ? 0 : 1;
+ Value *PtrOp = II->getArgOperand(PtrOpIdx);
Type *Type = II->getIntrinsicID() == Intrinsic::masked_load
? II->getType()
: II->getArgOperand(0)->getType();
- Align OldAlign =
- cast<ConstantInt>(II->getArgOperand(AlignOpIdx))->getAlignValue();
+ Align OldAlign = II->getParamAlign(PtrOpIdx).valueOrOne();
Align PrefAlign = DL.getPrefTypeAlign(Type);
Align NewAlign = Fn(PtrOp, OldAlign, PrefAlign);
- if (NewAlign <= OldAlign ||
- NewAlign.value() > std::numeric_limits<uint32_t>().max())
+ if (NewAlign <= OldAlign)
return false;
- Value *V =
- ConstantInt::get(Type::getInt32Ty(II->getContext()), NewAlign.value());
- II->setOperand(AlignOpIdx, V);
+ II->addParamAttr(PtrOpIdx,
+ Attribute::getWithAlignment(II->getContext(), NewAlign));
return true;
}
default:
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 28ae4f0..9aaf6a5 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -43,6 +43,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <cassert>
#include <utility>
@@ -1872,6 +1873,51 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
}
+/// This deals with a corner case when a LCSSA phi node appears in a non-exit
+/// block: the outer loop latch block does not need to be exit block of the
+/// inner loop. Consider a loop that was in LCSSA form, but then some
+/// transformation like loop-unswitch comes along and creates an empty block,
+/// where BB5 in this example is the outer loop latch block:
+///
+/// BB4:
+/// br label %BB5
+/// BB5:
+/// %old.cond.lcssa = phi i16 [ %cond, %BB4 ]
+/// br outer.header
+///
+/// Interchange then brings it in LCSSA form again resulting in this chain of
+/// single-input phi nodes:
+///
+/// BB4:
+/// %new.cond.lcssa = phi i16 [ %cond, %BB3 ]
+/// br label %BB5
+/// BB5:
+/// %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ]
+///
+/// The problem is that interchange can reoder blocks BB4 and BB5 placing the
+/// use before the def if we don't check this. The solution is to simplify
+/// lcssa phi nodes (remove) if they appear in non-exit blocks.
+///
+static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop) {
+ BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
+ BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+
+ // Do not modify lcssa phis where they actually belong, i.e. in exit blocks.
+ if (OuterLoopLatch == InnerLoopExit)
+ return;
+
+ // Collect and remove phis in non-exit blocks if they have 1 input.
+ SmallVector<PHINode *, 8> Phis(
+ llvm::make_pointer_range(OuterLoopLatch->phis()));
+ for (PHINode *Phi : Phis) {
+ assert(Phi->getNumIncomingValues() == 1 && "Single input phi expected");
+ LLVM_DEBUG(dbgs() << "Removing 1-input phi in non-exit block: " << *Phi
+ << "\n");
+ Phi->replaceAllUsesWith(Phi->getIncomingValue(0));
+ Phi->eraseFromParent();
+ }
+}
+
bool LoopInterchangeTransform::adjustLoopBranches() {
LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
std::vector<DominatorTree::UpdateType> DTUpdates;
@@ -1882,6 +1928,9 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
InnerLoopPreHeader && "Guaranteed by loop-simplify form");
+
+ simplifyLCSSAPhis(OuterLoop, InnerLoop);
+
// Ensure that both preheaders do not contain PHI nodes and have single
// predecessors. This allows us to move them easily. We use
// InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 7da8586..d827e64 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -8,7 +8,6 @@
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -217,9 +216,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
// Get the analysis results needed by loop passes.
MemorySSA *MSSA =
UseMemorySSA ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) : nullptr;
- BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
- ? (&AM.getResult<BlockFrequencyAnalysis>(F))
- : nullptr;
LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
AM.getResult<AssumptionAnalysis>(F),
AM.getResult<DominatorTreeAnalysis>(F),
@@ -227,7 +223,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
AM.getResult<ScalarEvolutionAnalysis>(F),
AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<TargetIRAnalysis>(F),
- BFI,
MSSA};
// Setup the loop analysis manager from its proxy. It is important that
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index ed4e2b1..3487e81 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -97,6 +97,12 @@ static cl::opt<MatrixLayoutTy> MatrixLayout(
static cl::opt<bool> PrintAfterTransposeOpt("matrix-print-after-transpose-opt",
cl::init(false));
+static cl::opt<unsigned> SplitMatmulRemainderOverThreshold(
+ "matrix-split-matmul-remainder-over-threshold", cl::Hidden,
+ cl::desc("Illegal remainder vectors over this size in bits should be split "
+ "in the inner loop of matmul"),
+ cl::init(0));
+
/// Helper function to either return Scope, if it is a subprogram or the
/// attached subprogram for a local scope.
static DISubprogram *getSubprogram(DIScope *Scope) {
@@ -1720,6 +1726,31 @@ public:
ToRemove.push_back(MatMul);
}
+ /// Given \p Remainder iterations of the the matmul inner loop,
+ /// potentially lower \p Blocksize that is used for the underlying
+ /// vector.
+ unsigned capBlockSize(unsigned BlockSize, unsigned Remainder, Type *EltType) {
+ if (BlockSize <= Remainder)
+ return BlockSize;
+
+ // If the remainder is also a legal type just use it.
+ auto *VecTy = FixedVectorType::get(EltType, Remainder);
+ if (TTI.isTypeLegal(VecTy))
+ return Remainder;
+
+ // Similarly, if the vector is small enough that we don't want
+ // to split further.
+ if (VecTy->getPrimitiveSizeInBits() <= SplitMatmulRemainderOverThreshold)
+ return Remainder;
+
+ // Gradually lower the vectorization factor to cover the
+ // remainder.
+ do {
+ BlockSize /= 2;
+ } while (BlockSize > Remainder);
+ return BlockSize;
+ }
+
/// Compute \p Result += \p A * \p B for input matrices with left-associating
/// addition.
///
@@ -1757,10 +1788,8 @@ public:
bool isSumZero = isa<ConstantAggregateZero>(Result.getColumn(J));
for (unsigned I = 0; I < R; I += BlockSize) {
- // Gradually lower the vectorization factor to cover the remainder.
- while (I + BlockSize > R)
- BlockSize /= 2;
-
+ // Lower block size to make sure we stay within bounds.
+ BlockSize = capBlockSize(BlockSize, R - I, Result.getElementType());
Value *Sum = IsTiled ? Result.extractVector(I, J, BlockSize, Builder)
: nullptr;
for (unsigned K = 0; K < M; ++K) {
@@ -1785,9 +1814,8 @@ public:
unsigned BlockSize = VF;
bool isSumZero = isa<ConstantAggregateZero>(Result.getRow(I));
for (unsigned J = 0; J < C; J += BlockSize) {
- // Gradually lower the vectorization factor to cover the remainder.
- while (J + BlockSize > C)
- BlockSize /= 2;
+ // Lower the vectorization factor to cover the remainder.
+ BlockSize = capBlockSize(BlockSize, C - J, Result.getElementType());
Value *Sum = nullptr;
for (unsigned K = 0; K < M; ++K) {
diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index 42d6680..146e7d1 100644
--- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -111,7 +111,7 @@ static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
}
// Translate a masked load intrinsic like
-// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
+// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr,
// <16 x i1> %mask, <16 x i32> %passthru)
// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
@@ -146,11 +146,10 @@ static void scalarizeMaskedLoad(const DataLayout &DL, bool HasBranchDivergence,
CallInst *CI, DomTreeUpdater *DTU,
bool &ModifiedDT) {
Value *Ptr = CI->getArgOperand(0);
- Value *Alignment = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
- Value *Src0 = CI->getArgOperand(3);
+ Value *Mask = CI->getArgOperand(1);
+ Value *Src0 = CI->getArgOperand(2);
- const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
+ const Align AlignVal = CI->getParamAlign(0).valueOrOne();
VectorType *VecType = cast<FixedVectorType>(CI->getType());
Type *EltTy = VecType->getElementType();
@@ -290,7 +289,7 @@ static void scalarizeMaskedLoad(const DataLayout &DL, bool HasBranchDivergence,
}
// Translate a masked store intrinsic, like
-// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
+// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr,
// <16 x i1> %mask)
// to a chain of basic blocks, that stores element one-by-one if
// the appropriate mask bit is set
@@ -320,10 +319,9 @@ static void scalarizeMaskedStore(const DataLayout &DL, bool HasBranchDivergence,
bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptr = CI->getArgOperand(1);
- Value *Alignment = CI->getArgOperand(2);
- Value *Mask = CI->getArgOperand(3);
+ Value *Mask = CI->getArgOperand(2);
- const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
+ const Align AlignVal = CI->getParamAlign(1).valueOrOne();
auto *VecType = cast<VectorType>(Src->getType());
Type *EltTy = VecType->getElementType();
@@ -472,9 +470,8 @@ static void scalarizeMaskedGather(const DataLayout &DL,
bool HasBranchDivergence, CallInst *CI,
DomTreeUpdater *DTU, bool &ModifiedDT) {
Value *Ptrs = CI->getArgOperand(0);
- Value *Alignment = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
- Value *Src0 = CI->getArgOperand(3);
+ Value *Mask = CI->getArgOperand(1);
+ Value *Src0 = CI->getArgOperand(2);
auto *VecType = cast<FixedVectorType>(CI->getType());
Type *EltTy = VecType->getElementType();
@@ -483,7 +480,7 @@ static void scalarizeMaskedGather(const DataLayout &DL,
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
- MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
+ Align AlignVal = CI->getParamAlign(0).valueOrOne();
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
@@ -608,8 +605,7 @@ static void scalarizeMaskedScatter(const DataLayout &DL,
DomTreeUpdater *DTU, bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptrs = CI->getArgOperand(1);
- Value *Alignment = CI->getArgOperand(2);
- Value *Mask = CI->getArgOperand(3);
+ Value *Mask = CI->getArgOperand(2);
auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
@@ -623,7 +619,7 @@ static void scalarizeMaskedScatter(const DataLayout &DL,
Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
- MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
+ Align AlignVal = CI->getParamAlign(1).valueOrOne();
unsigned VectorWidth = SrcFVTy->getNumElements();
// Shorten the way if the mask is a vector of constants.
@@ -1125,8 +1121,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
case Intrinsic::masked_load:
// Scalarize unsupported vector masked load
if (TTI.isLegalMaskedLoad(
- CI->getType(),
- cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue(),
+ CI->getType(), CI->getParamAlign(0).valueOrOne(),
cast<PointerType>(CI->getArgOperand(0)->getType())
->getAddressSpace()))
return false;
@@ -1135,18 +1130,15 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
case Intrinsic::masked_store:
if (TTI.isLegalMaskedStore(
CI->getArgOperand(0)->getType(),
- cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue(),
+ CI->getParamAlign(1).valueOrOne(),
cast<PointerType>(CI->getArgOperand(1)->getType())
->getAddressSpace()))
return false;
scalarizeMaskedStore(DL, HasBranchDivergence, CI, DTU, ModifiedDT);
return true;
case Intrinsic::masked_gather: {
- MaybeAlign MA =
- cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
+ Align Alignment = CI->getParamAlign(0).valueOrOne();
Type *LoadTy = CI->getType();
- Align Alignment = DL.getValueOrABITypeAlignment(MA,
- LoadTy->getScalarType());
if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
!TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
return false;
@@ -1154,11 +1146,8 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
return true;
}
case Intrinsic::masked_scatter: {
- MaybeAlign MA =
- cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
+ Align Alignment = CI->getParamAlign(1).valueOrOne();
Type *StoreTy = CI->getArgOperand(0)->getType();
- Align Alignment = DL.getValueOrABITypeAlignment(MA,
- StoreTy->getScalarType());
if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
!TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
Alignment))
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index e4ba70d..5af6c96 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -3611,8 +3610,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
AssumptionCache &AC, AAResults &AA,
TargetTransformInfo &TTI, bool Trivial,
bool NonTrivial, ScalarEvolution *SE,
- MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) {
+ MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
@@ -3652,35 +3650,6 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (F->hasOptSize())
return false;
- // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
- // of the loops L is nested in, and of the loops nested in L are all cold.
- auto IsLoopNestCold = [&](const Loop *L) {
- // Check L and all of its parent loops.
- auto *Parent = L;
- while (Parent) {
- if (!PSI->isColdBlock(Parent->getHeader(), BFI))
- return false;
- Parent = Parent->getParentLoop();
- }
- // Next check all loops nested within L.
- SmallVector<const Loop *, 4> Worklist;
- llvm::append_range(Worklist, L->getSubLoops());
- while (!Worklist.empty()) {
- auto *CurLoop = Worklist.pop_back_val();
- if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
- return false;
- llvm::append_range(Worklist, CurLoop->getSubLoops());
- }
- return true;
- };
-
- // Skip cold loops in cold loop nests, as unswitching them brings little
- // benefit but increases the code size
- if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
- LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
- return false;
- }
-
// Perform legality checks.
if (!isSafeForNoNTrivialUnswitching(L, LI))
return false;
@@ -3705,11 +3674,6 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &U) {
Function &F = *L.getHeader()->getParent();
(void)F;
- ProfileSummaryInfo *PSI = nullptr;
- if (auto OuterProxy =
- AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
- .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
- PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
<< "\n");
@@ -3720,7 +3684,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
AR.MSSA->verifyMemorySSA();
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
- &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U))
+ &AR.SE, MSSAU ? &*MSSAU : nullptr, U))
return PreservedAnalyses::all();
if (AR.MSSA && VerifyMemorySSA)
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index fa66a03..23e1243 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -227,6 +227,7 @@ static InstructionCost ComputeSpeculationCost(const Instruction *I,
case Instruction::Call:
case Instruction::BitCast:
case Instruction::PtrToInt:
+ case Instruction::PtrToAddr:
case Instruction::IntToPtr:
case Instruction::AddrSpaceCast:
case Instruction::FPToUI: