aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r--llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFuse.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp30
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp19
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp23
8 files changed, 72 insertions, 40 deletions
diff --git a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
index 89980d5..a577f51 100644
--- a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
+++ b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
@@ -122,7 +122,8 @@ DropUnnecessaryAssumesPass::run(Function &F, FunctionAnalysisManager &FAM) {
Value *Cond = Assume->getArgOperand(0);
// Don't drop type tests, which have special semantics.
- if (match(Cond, m_Intrinsic<Intrinsic::type_test>()))
+ if (match(Cond, m_Intrinsic<Intrinsic::type_test>()) ||
+ match(Cond, m_Intrinsic<Intrinsic::public_type_test>()))
continue;
SmallVector<Value *> Affected;
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index a06f832..d564e32 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -514,7 +514,7 @@ public:
class GVNSink {
public:
- GVNSink() {}
+ GVNSink() = default;
bool run(Function &F) {
LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 19eccb9..9ffa602 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -1796,14 +1796,16 @@ private:
// mergeLatch may remove the only block in FC1.
SE.forgetLoop(FC1.L);
SE.forgetLoop(FC0.L);
- // Forget block dispositions as well, so that there are no dangling
- // pointers to erased/free'ed blocks.
- SE.forgetBlockAndLoopDispositions();
// Move instructions from FC0.Latch to FC1.Latch.
// Note: mergeLatch requires an updated DT.
mergeLatch(FC0, FC1);
+ // Forget block dispositions as well, so that there are no dangling
+ // pointers to erased/free'ed blocks. It should be done after mergeLatch()
+ // since merging the latches may affect the dispositions.
+ SE.forgetBlockAndLoopDispositions();
+
// Merge the loops.
SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
for (BasicBlock *BB : Blocks) {
@@ -2092,14 +2094,16 @@ private:
// mergeLatch may remove the only block in FC1.
SE.forgetLoop(FC1.L);
SE.forgetLoop(FC0.L);
- // Forget block dispositions as well, so that there are no dangling
- // pointers to erased/free'ed blocks.
- SE.forgetBlockAndLoopDispositions();
// Move instructions from FC0.Latch to FC1.Latch.
// Note: mergeLatch requires an updated DT.
mergeLatch(FC0, FC1);
+ // Forget block dispositions as well, so that there are no dangling
+ // pointers to erased/free'ed blocks. It should be done after mergeLatch()
+ // since merging the latches may affect the dispositions.
+ SE.forgetBlockAndLoopDispositions();
+
// Merge the loops.
SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
for (BasicBlock *BB : Blocks) {
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index a883998..1b770be 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -89,8 +89,8 @@ struct StoreToLoadForwardingCandidate {
/// Return true if the dependence from the store to the load has an
/// absolute distance of one.
/// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
- bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
- Loop *L) const {
+ bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE, Loop *L,
+ const DominatorTree &DT) const {
Value *LoadPtr = Load->getPointerOperand();
Value *StorePtr = Store->getPointerOperand();
Type *LoadType = getLoadStoreType(Load);
@@ -102,8 +102,10 @@ struct StoreToLoadForwardingCandidate {
DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
"Should be a known dependence");
- int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
- int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
+ int64_t StrideLoad =
+ getPtrStride(PSE, LoadType, LoadPtr, L, DT).value_or(0);
+ int64_t StrideStore =
+ getPtrStride(PSE, LoadType, StorePtr, L, DT).value_or(0);
if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
return false;
@@ -287,8 +289,8 @@ public:
// so deciding which one forwards is easy. The later one forwards as
// long as they both have a dependence distance of one to the load.
if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
- Cand.isDependenceDistanceOfOne(PSE, L) &&
- OtherCand->isDependenceDistanceOfOne(PSE, L)) {
+ Cand.isDependenceDistanceOfOne(PSE, L, *DT) &&
+ OtherCand->isDependenceDistanceOfOne(PSE, L, *DT)) {
// They are in the same block, the later one will forward to the load.
if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
OtherCand = &Cand;
@@ -538,7 +540,7 @@ public:
// Check whether the SCEV difference is the same as the induction step,
// thus we load the value in the next iteration.
- if (!Cand.isDependenceDistanceOfOne(PSE, L))
+ if (!Cand.isDependenceDistanceOfOne(PSE, L, *DT))
continue;
assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 2bda9d8..802ae4e 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1327,7 +1327,8 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
}
// Do not attempt partial/runtime unrolling in FullLoopUnrolling
- if (OnlyFullUnroll && (UP.Count < TripCount || UP.Count < MaxTripCount)) {
+ if (OnlyFullUnroll && ((!TripCount && !MaxTripCount) ||
+ UP.Count < TripCount || UP.Count < MaxTripCount)) {
LLVM_DEBUG(
dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
return LoopUnrollResult::Unmodified;
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 3487e81..7e70ba2 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -245,11 +245,14 @@ raw_ostream &operator<<(raw_ostream &OS, ShapeInfo SI) {
} // namespace
-static bool isUniformShape(Value *V) {
+static bool isShapePreserving(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I)
return true;
+ if (isa<SelectInst>(I))
+ return true;
+
if (I->isBinaryOp())
return true;
@@ -300,6 +303,16 @@ static bool isUniformShape(Value *V) {
}
}
+/// Return an iterator over the operands of \p I that should share shape
+/// information with \p I.
+static iterator_range<Use *> getShapedOperandsForInst(Instruction *I) {
+ assert(isShapePreserving(I) &&
+ "Can't retrieve shaped operands for an instruction that does not "
+ "preserve shape information");
+ auto Ops = I->operands();
+ return isa<SelectInst>(I) ? drop_begin(Ops) : Ops;
+}
+
/// Return the ShapeInfo for the result of \p I, it it can be determined.
static std::optional<ShapeInfo>
computeShapeInfoForInst(Instruction *I,
@@ -329,9 +342,8 @@ computeShapeInfoForInst(Instruction *I,
return OpShape->second;
}
- if (isUniformShape(I) || isa<SelectInst>(I)) {
- auto Ops = I->operands();
- auto ShapedOps = isa<SelectInst>(I) ? drop_begin(Ops) : Ops;
+ if (isShapePreserving(I)) {
+ auto ShapedOps = getShapedOperandsForInst(I);
// Find the first operand that has a known shape and use that.
for (auto &Op : ShapedOps) {
auto OpShape = ShapeMap.find(Op.get());
@@ -710,10 +722,9 @@ public:
case Intrinsic::matrix_column_major_store:
return true;
default:
- return isUniformShape(II);
+ break;
}
- return isUniformShape(V) || isa<StoreInst>(V) || isa<LoadInst>(V) ||
- isa<SelectInst>(V);
+ return isShapePreserving(V) || isa<StoreInst>(V) || isa<LoadInst>(V);
}
/// Propagate the shape information of instructions to their users.
@@ -800,9 +811,8 @@ public:
} else if (isa<StoreInst>(V)) {
// Nothing to do. We forward-propagated to this so we would just
// backward propagate to an instruction with an already known shape.
- } else if (isUniformShape(V) || isa<SelectInst>(V)) {
- auto Ops = cast<Instruction>(V)->operands();
- auto ShapedOps = isa<SelectInst>(V) ? drop_begin(Ops) : Ops;
+ } else if (isShapePreserving(V)) {
+ auto ShapedOps = getShapedOperandsForInst(cast<Instruction>(V));
// Propagate to all operands.
ShapeInfo Shape = ShapeMap[V];
for (Use &U : ShapedOps) {
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index bb6c879..239526e 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -337,7 +337,7 @@ static void buildPartialUnswitchConditionalBranch(
static void buildPartialInvariantUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {
ValueToValueMapTy VMap;
for (auto *Val : reverse(ToDuplicate)) {
Instruction *Inst = cast<Instruction>(Val);
@@ -377,8 +377,19 @@ static void buildPartialInvariantUnswitchConditionalBranch(
IRBuilder<> IRB(&BB);
IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
Value *Cond = VMap[ToDuplicate[0]];
- IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
- Direction ? &NormalSucc : &UnswitchedSucc);
+ // The expectation is that ToDuplicate[0] is the condition used by the
+ // OriginalBranch, case in which we can clone the profile metadata from there.
+ auto *ProfData =
+ !ProfcheckDisableMetadataFixes &&
+ ToDuplicate[0] == skipTrivialSelect(OriginalBranch.getCondition())
+ ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
+ : nullptr;
+ auto *BR =
+ IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+ Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
+ if (!ProfData)
+ setExplicitlyUnknownBranchWeightsIfProfiled(*BR, *BR->getFunction(),
+ DEBUG_TYPE);
}
/// Rewrite the PHI nodes in an unswitched loop exit basic block.
@@ -2515,7 +2526,7 @@ static void unswitchNontrivialInvariants(
// the branch in the split block.
if (PartiallyInvariant)
buildPartialInvariantUnswitchConditionalBranch(
- *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
+ *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
else {
buildPartialUnswitchConditionalBranch(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 0f3978f..0a8f5ea 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -143,8 +143,8 @@ struct SubGraphTraits {
class WrappedSuccIterator
: public iterator_adaptor_base<
WrappedSuccIterator, BaseSuccIterator,
- typename std::iterator_traits<BaseSuccIterator>::iterator_category,
- NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
+ std::iterator_traits<BaseSuccIterator>::iterator_category, NodeRef,
+ std::ptrdiff_t, NodeRef *, NodeRef> {
SmallDenseSet<RegionNode *> *Nodes;
public:
@@ -558,11 +558,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
} else {
// Test for successors as back edge
BasicBlock *BB = N->getNodeAs<BasicBlock>();
- BranchInst *Term = cast<BranchInst>(BB->getTerminator());
-
- for (BasicBlock *Succ : Term->successors())
- if (Visited.count(Succ))
- Loops[Succ] = BB;
+ if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()))
+ for (BasicBlock *Succ : Term->successors())
+ if (Visited.count(Succ))
+ Loops[Succ] = BB;
}
}
@@ -594,7 +593,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
for (BasicBlock *P : predecessors(BB)) {
// Ignore it if it's a branch from outside into our region entry
- if (!ParentRegion->contains(P))
+ if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))
continue;
Region *R = RI->getRegionFor(P);
@@ -1402,13 +1401,17 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
/// Run the transformation for each region found
bool StructurizeCFG::run(Region *R, DominatorTree *DT,
const TargetTransformInfo *TTI) {
- if (R->isTopLevelRegion())
+ // CallBr and its corresponding direct target blocks are for now ignored by
+ // this pass. This is not a limitation for the currently intended uses cases
+ // of callbr in the AMDGPU backend.
+ // Parent and child regions are not affected by this (current) restriction.
+ // See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details.
+ if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))
return false;
this->DT = DT;
this->TTI = TTI;
Func = R->getEntry()->getParent();
- assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
ParentRegion = R;