aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachinePipeliner.cpp
diff options
context:
space:
mode:
authorRyotaro Kasuga <kasuga.ryotaro@fujitsu.com>2025-04-23 18:11:34 +0900
committerGitHub <noreply@github.com>2025-04-23 18:11:34 +0900
commit3cd6b86cc1e1fd1d8d62ca1bcb8498362a4f7b68 (patch)
tree6747e816fc0e80f6123c25bdb2e73fda3eb3ed7d /llvm/lib/CodeGen/MachinePipeliner.cpp
parenta1331704752c46cd4d954eb8682af230937fe5a6 (diff)
downloadllvm-3cd6b86cc1e1fd1d8d62ca1bcb8498362a4f7b68.zip
llvm-3cd6b86cc1e1fd1d8d62ca1bcb8498362a4f7b68.tar.gz
llvm-3cd6b86cc1e1fd1d8d62ca1bcb8498362a4f7b68.tar.bz2
[MachinePipeliner] Use AliasAnalysis properly when analyzing loop-carried dependencies (#136691)
MachinePipeliner uses AliasAnalysis to collect loop-carried memory dependencies. To analyze loop-carried dependencies, we need to explicitly tell AliasAnalysis that the values may come from different iterations. Before this patch, MachinePipeliner didn't do this, so some loop-carried dependencies might be missed. For example, in the following case, there is a loop-carried dependency from the load to the store, but it wasn't considered. ``` def @f(ptr noalias %p0, ptr noalias %p1) { entry: br label %body loop: %idx0 = phi ptr [ %p0, %entry ], [ %p1, %body ] %idx1 = phi ptr [ %p1, %entry ], [ %p0, %body ] %v0 = load %idx0 ... store %v1, %idx1 ... } ``` Further, the handling of the underlying objects was not sound. If there is no information about memory operands (i.e., `memoperands()` is empty), it must be handled conservatively. However, Machinepipeliner uses a dummy value (namely `UnknownValue`). It is distinguished from other "known" objects, causing necessary dependencies to be missed. (NOTE: in such cases, `buildSchedGraph` adds non-loop-carried dependencies correctly, so perhaps a critical problem has not occurred.) This patch fixes the above problems. This change has increased false dependencies that didn't exist before. Therefore, this patch also introduces additional alias checks with the underlying objects. Split off from #135148
Diffstat (limited to 'llvm/lib/CodeGen/MachinePipeliner.cpp')
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp225
1 files changed, 137 insertions, 88 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 6cb0299..07bffc6 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -237,6 +237,37 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
"Modulo Software Pipelining", false, false)
+namespace {
+
+/// This class holds an SUnit corresponding to a memory operation and other
+/// information related to the instruction.
+struct SUnitWithMemInfo {
+ SUnit *SU;
+ SmallVector<const Value *, 2> UnderlyingObjs;
+
+ /// The value of a memory operand.
+ const Value *MemOpValue = nullptr;
+
+ /// The offset of a memory operand.
+ int64_t MemOpOffset = 0;
+
+ AAMDNodes AATags;
+
+ /// True if all the underlying objects are identified.
+ bool IsAllIdentified = false;
+
+ SUnitWithMemInfo(SUnit *SU);
+
+ bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const;
+
+ bool isUnknown() const { return MemOpValue == nullptr; }
+
+private:
+ bool getUnderlyingObjects();
+};
+
+} // end anonymous namespace
+
/// The "main" function for implementing Swing Modulo Scheduling.
bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
if (skipFunction(mf.getFunction()))
@@ -470,9 +501,10 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
SwingSchedulerDAG SMS(
*this, L, getAnalysis<LiveIntervalsWrapperPass>().getLIS(), RegClassInfo,
- II_setByPragma, LI.LoopPipelinerInfo.get());
+ II_setByPragma, LI.LoopPipelinerInfo.get(), AA);
MachineBasicBlock *MBB = L.getHeader();
// The kernel should not include any terminator instructions. These
@@ -560,9 +592,8 @@ void SwingSchedulerDAG::setMAX_II() {
/// We override the schedule function in ScheduleDAGInstrs to implement the
/// scheduling part of the Swing Modulo Scheduling algorithm.
void SwingSchedulerDAG::schedule() {
- AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
buildSchedGraph(AA);
- addLoopCarriedDependences(AA);
+ addLoopCarriedDependences();
updatePhiDependences();
Topo.InitDAGTopologicalSorting();
changeDependences();
@@ -810,113 +841,131 @@ static bool isDependenceBarrier(MachineInstr &MI) {
(!MI.mayLoad() || !MI.isDereferenceableInvariantLoad()));
}
-/// Return the underlying objects for the memory references of an instruction.
+SUnitWithMemInfo::SUnitWithMemInfo(SUnit *SU) : SU(SU) {
+ if (!getUnderlyingObjects())
+ return;
+ for (const Value *Obj : UnderlyingObjs)
+ if (!isIdentifiedObject(Obj)) {
+ IsAllIdentified = false;
+ break;
+ }
+}
+
+bool SUnitWithMemInfo::isTriviallyDisjoint(
+ const SUnitWithMemInfo &Other) const {
+ // If all underlying objects are identified objects and there is no overlap
+ // between them, then these two instructions are disjoint.
+ if (!IsAllIdentified || !Other.IsAllIdentified)
+ return false;
+ for (const Value *Obj : UnderlyingObjs)
+ if (llvm::is_contained(Other.UnderlyingObjs, Obj))
+ return false;
+ return true;
+}
+
+/// Collect the underlying objects for the memory references of an instruction.
/// This function calls the code in ValueTracking, but first checks that the
/// instruction has a memory operand.
-static void getUnderlyingObjects(const MachineInstr *MI,
- SmallVectorImpl<const Value *> &Objs) {
+/// Returns false if we cannot find the underlying objects.
+bool SUnitWithMemInfo::getUnderlyingObjects() {
+ const MachineInstr *MI = SU->getInstr();
if (!MI->hasOneMemOperand())
- return;
+ return false;
MachineMemOperand *MM = *MI->memoperands_begin();
if (!MM->getValue())
- return;
- getUnderlyingObjects(MM->getValue(), Objs);
- for (const Value *V : Objs) {
- if (!isIdentifiedObject(V)) {
- Objs.clear();
- return;
- }
- }
+ return false;
+ MemOpValue = MM->getValue();
+ MemOpOffset = MM->getOffset();
+ llvm::getUnderlyingObjects(MemOpValue, UnderlyingObjs);
+
+ // TODO: A no alias scope may be valid only in a single iteration. In this
+ // case we need to peel off it like LoopAccessAnalysis does.
+ AATags = MM->getAAInfo();
+ return true;
}
/// Add a chain edge between a load and store if the store can be an
/// alias of the load on a subsequent iteration, i.e., a loop carried
/// dependence. This code is very similar to the code in ScheduleDAGInstrs
/// but that code doesn't create loop carried dependences.
-void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
- MapVector<const Value *, SmallVector<SUnit *, 4>> PendingLoads;
- Value *UnknownValue =
- UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
+void SwingSchedulerDAG::addLoopCarriedDependences() {
+ SmallVector<SUnitWithMemInfo, 4> PendingLoads;
for (auto &SU : SUnits) {
MachineInstr &MI = *SU.getInstr();
if (isDependenceBarrier(MI))
PendingLoads.clear();
else if (MI.mayLoad()) {
- SmallVector<const Value *, 4> Objs;
- ::getUnderlyingObjects(&MI, Objs);
- if (Objs.empty())
- Objs.push_back(UnknownValue);
- for (const auto *V : Objs) {
- SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
- SUs.push_back(&SU);
- }
+ PendingLoads.emplace_back(&SU);
} else if (MI.mayStore()) {
- SmallVector<const Value *, 4> Objs;
- ::getUnderlyingObjects(&MI, Objs);
- if (Objs.empty())
- Objs.push_back(UnknownValue);
- for (const auto *V : Objs) {
- MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I =
- PendingLoads.find(V);
- if (I == PendingLoads.end())
+ SUnitWithMemInfo Store(&SU);
+ for (const SUnitWithMemInfo &Load : PendingLoads) {
+ if (Load.isTriviallyDisjoint(Store))
continue;
- for (auto *Load : I->second) {
- if (isSuccOrder(Load, &SU))
- continue;
- MachineInstr &LdMI = *Load->getInstr();
- // First, perform the cheaper check that compares the base register.
- // If they are the same and the load offset is less than the store
- // offset, then mark the dependence as loop carried potentially.
- const MachineOperand *BaseOp1, *BaseOp2;
- int64_t Offset1, Offset2;
- bool Offset1IsScalable, Offset2IsScalable;
- if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
- Offset1IsScalable, TRI) &&
- TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
- Offset2IsScalable, TRI)) {
- if (BaseOp1->isIdenticalTo(*BaseOp2) &&
- Offset1IsScalable == Offset2IsScalable &&
- (int)Offset1 < (int)Offset2) {
- assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&
- "What happened to the chain edge?");
- SDep Dep(Load, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- continue;
- }
- }
- // Second, the more expensive check that uses alias analysis on the
- // base registers. If they alias, and the load offset is less than
- // the store offset, the mark the dependence as loop carried.
- if (!AA) {
- SDep Dep(Load, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- continue;
- }
- MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
- MachineMemOperand *MMO2 = *MI.memoperands_begin();
- if (!MMO1->getValue() || !MMO2->getValue()) {
- SDep Dep(Load, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- continue;
- }
- if (MMO1->getValue() == MMO2->getValue() &&
- MMO1->getOffset() <= MMO2->getOffset()) {
- SDep Dep(Load, SDep::Barrier);
+ if (isSuccOrder(Load.SU, Store.SU))
+ continue;
+ MachineInstr &LdMI = *Load.SU->getInstr();
+ // First, perform the cheaper check that compares the base register.
+ // If they are the same and the load offset is less than the store
+ // offset, then mark the dependence as loop carried potentially.
+ const MachineOperand *BaseOp1, *BaseOp2;
+ int64_t Offset1, Offset2;
+ bool Offset1IsScalable, Offset2IsScalable;
+ if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
+ Offset1IsScalable, TRI) &&
+ TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
+ Offset2IsScalable, TRI)) {
+ if (BaseOp1->isIdenticalTo(*BaseOp2) &&
+ Offset1IsScalable == Offset2IsScalable &&
+ (int)Offset1 < (int)Offset2) {
+ assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&
+ "What happened to the chain edge?");
+ SDep Dep(Load.SU, SDep::Barrier);
Dep.setLatency(1);
SU.addPred(Dep);
continue;
}
- if (!AA->isNoAlias(
- MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
- MemoryLocation::getAfter(MMO2->getValue(),
- MMO2->getAAInfo()))) {
- SDep Dep(Load, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- }
+ }
+ // Second, the more expensive check that uses alias analysis on the
+ // base registers. If they alias, and the load offset is less than
+ // the store offset, the mark the dependence as loop carried.
+ if (Load.isUnknown() || Store.isUnknown()) {
+ SDep Dep(Load.SU, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ continue;
+ }
+ if (Load.MemOpValue == Store.MemOpValue &&
+ Load.MemOpOffset <= Store.MemOpOffset) {
+ SDep Dep(Load.SU, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ continue;
+ }
+
+ bool IsNoAlias = [&] {
+ if (BAA.isNoAlias(MemoryLocation::getBeforeOrAfter(Load.MemOpValue,
+ Load.AATags),
+ MemoryLocation::getBeforeOrAfter(Store.MemOpValue,
+ Store.AATags)))
+ return true;
+
+ // AliasAnalysis sometimes gives up on following the underlying
+ // object. In such a case, separate checks for underlying objects may
+ // prove that there are no aliases between two accesses.
+ for (const Value *LoadObj : Load.UnderlyingObjs)
+ for (const Value *StoreObj : Store.UnderlyingObjs)
+ if (!BAA.isNoAlias(
+ MemoryLocation::getBeforeOrAfter(LoadObj, Load.AATags),
+ MemoryLocation::getBeforeOrAfter(StoreObj, Store.AATags)))
+ return false;
+
+ return true;
+ }();
+
+ if (!IsNoAlias) {
+ SDep Dep(Load.SU, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
}
}
}