aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachinePipeliner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/MachinePipeliner.cpp')
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp225
1 files changed, 137 insertions, 88 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 6cb0299..07bffc6 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -237,6 +237,37 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
"Modulo Software Pipelining", false, false)
+namespace {
+
+/// This class holds an SUnit corresponding to a memory operation and other
+/// information related to the instruction.
+struct SUnitWithMemInfo {
+ SUnit *SU;
+ SmallVector<const Value *, 2> UnderlyingObjs;
+
+ /// The value of a memory operand.
+ const Value *MemOpValue = nullptr;
+
+ /// The offset of a memory operand.
+ int64_t MemOpOffset = 0;
+
+ AAMDNodes AATags;
+
+ /// True if all the underlying objects are identified.
+ bool IsAllIdentified = false;
+
+ SUnitWithMemInfo(SUnit *SU);
+
+ bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const;
+
+ bool isUnknown() const { return MemOpValue == nullptr; }
+
+private:
+ bool getUnderlyingObjects();
+};
+
+} // end anonymous namespace
+
/// The "main" function for implementing Swing Modulo Scheduling.
bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
if (skipFunction(mf.getFunction()))
@@ -470,9 +501,10 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
SwingSchedulerDAG SMS(
*this, L, getAnalysis<LiveIntervalsWrapperPass>().getLIS(), RegClassInfo,
- II_setByPragma, LI.LoopPipelinerInfo.get());
+ II_setByPragma, LI.LoopPipelinerInfo.get(), AA);
MachineBasicBlock *MBB = L.getHeader();
// The kernel should not include any terminator instructions. These
@@ -560,9 +592,8 @@ void SwingSchedulerDAG::setMAX_II() {
/// We override the schedule function in ScheduleDAGInstrs to implement the
/// scheduling part of the Swing Modulo Scheduling algorithm.
void SwingSchedulerDAG::schedule() {
- AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
buildSchedGraph(AA);
- addLoopCarriedDependences(AA);
+ addLoopCarriedDependences();
updatePhiDependences();
Topo.InitDAGTopologicalSorting();
changeDependences();
@@ -810,113 +841,131 @@ static bool isDependenceBarrier(MachineInstr &MI) {
(!MI.mayLoad() || !MI.isDereferenceableInvariantLoad()));
}
-/// Return the underlying objects for the memory references of an instruction.
+SUnitWithMemInfo::SUnitWithMemInfo(SUnit *SU) : SU(SU) {
+ if (!getUnderlyingObjects())
+ return;
+ for (const Value *Obj : UnderlyingObjs)
+ if (!isIdentifiedObject(Obj)) {
+ IsAllIdentified = false;
+ break;
+ }
+}
+
+bool SUnitWithMemInfo::isTriviallyDisjoint(
+ const SUnitWithMemInfo &Other) const {
+ // If all underlying objects are identified objects and there is no overlap
+ // between them, then these two instructions are disjoint.
+ if (!IsAllIdentified || !Other.IsAllIdentified)
+ return false;
+ for (const Value *Obj : UnderlyingObjs)
+ if (llvm::is_contained(Other.UnderlyingObjs, Obj))
+ return false;
+ return true;
+}
+
+/// Collect the underlying objects for the memory references of an instruction.
/// This function calls the code in ValueTracking, but first checks that the
/// instruction has a memory operand.
-static void getUnderlyingObjects(const MachineInstr *MI,
- SmallVectorImpl<const Value *> &Objs) {
+/// Returns false if we cannot find the underlying objects.
+bool SUnitWithMemInfo::getUnderlyingObjects() {
+ const MachineInstr *MI = SU->getInstr();
if (!MI->hasOneMemOperand())
- return;
+ return false;
MachineMemOperand *MM = *MI->memoperands_begin();
if (!MM->getValue())
- return;
- getUnderlyingObjects(MM->getValue(), Objs);
- for (const Value *V : Objs) {
- if (!isIdentifiedObject(V)) {
- Objs.clear();
- return;
- }
- }
+ return false;
+ MemOpValue = MM->getValue();
+ MemOpOffset = MM->getOffset();
+ llvm::getUnderlyingObjects(MemOpValue, UnderlyingObjs);
+
+ // TODO: A no alias scope may be valid only in a single iteration. In this
+ // case we need to peel off it like LoopAccessAnalysis does.
+ AATags = MM->getAAInfo();
+ return true;
}
/// Add a chain edge between a load and store if the store can be an
/// alias of the load on a subsequent iteration, i.e., a loop carried
/// dependence. This code is very similar to the code in ScheduleDAGInstrs
/// but that code doesn't create loop carried dependences.
-void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
- MapVector<const Value *, SmallVector<SUnit *, 4>> PendingLoads;
- Value *UnknownValue =
- UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
+void SwingSchedulerDAG::addLoopCarriedDependences() {
+ SmallVector<SUnitWithMemInfo, 4> PendingLoads;
for (auto &SU : SUnits) {
MachineInstr &MI = *SU.getInstr();
if (isDependenceBarrier(MI))
PendingLoads.clear();
else if (MI.mayLoad()) {
- SmallVector<const Value *, 4> Objs;
- ::getUnderlyingObjects(&MI, Objs);
- if (Objs.empty())
- Objs.push_back(UnknownValue);
- for (const auto *V : Objs) {
- SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
- SUs.push_back(&SU);
- }
+ PendingLoads.emplace_back(&SU);
} else if (MI.mayStore()) {
- SmallVector<const Value *, 4> Objs;
- ::getUnderlyingObjects(&MI, Objs);
- if (Objs.empty())
- Objs.push_back(UnknownValue);
- for (const auto *V : Objs) {
- MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I =
- PendingLoads.find(V);
- if (I == PendingLoads.end())
+ SUnitWithMemInfo Store(&SU);
+ for (const SUnitWithMemInfo &Load : PendingLoads) {
+ if (Load.isTriviallyDisjoint(Store))
continue;
- for (auto *Load : I->second) {
- if (isSuccOrder(Load, &SU))
- continue;
- MachineInstr &LdMI = *Load->getInstr();
- // First, perform the cheaper check that compares the base register.
- // If they are the same and the load offset is less than the store
- // offset, then mark the dependence as loop carried potentially.
- const MachineOperand *BaseOp1, *BaseOp2;
- int64_t Offset1, Offset2;
- bool Offset1IsScalable, Offset2IsScalable;
- if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
- Offset1IsScalable, TRI) &&
- TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
- Offset2IsScalable, TRI)) {
- if (BaseOp1->isIdenticalTo(*BaseOp2) &&
- Offset1IsScalable == Offset2IsScalable &&
- (int)Offset1 < (int)Offset2) {
- assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&
- "What happened to the chain edge?");
- SDep Dep(Load, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- continue;
- }
- }
- // Second, the more expensive check that uses alias analysis on the
- // base registers. If they alias, and the load offset is less than
- // the store offset, the mark the dependence as loop carried.
- if (!AA) {
- SDep Dep(Load, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- continue;
- }
- MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
- MachineMemOperand *MMO2 = *MI.memoperands_begin();
- if (!MMO1->getValue() || !MMO2->getValue()) {
- SDep Dep(Load, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- continue;
- }
- if (MMO1->getValue() == MMO2->getValue() &&
- MMO1->getOffset() <= MMO2->getOffset()) {
- SDep Dep(Load, SDep::Barrier);
+ if (isSuccOrder(Load.SU, Store.SU))
+ continue;
+ MachineInstr &LdMI = *Load.SU->getInstr();
+ // First, perform the cheaper check that compares the base register.
+ // If they are the same and the load offset is less than the store
+ // offset, then mark the dependence as loop carried potentially.
+ const MachineOperand *BaseOp1, *BaseOp2;
+ int64_t Offset1, Offset2;
+ bool Offset1IsScalable, Offset2IsScalable;
+ if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
+ Offset1IsScalable, TRI) &&
+ TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
+ Offset2IsScalable, TRI)) {
+ if (BaseOp1->isIdenticalTo(*BaseOp2) &&
+ Offset1IsScalable == Offset2IsScalable &&
+ (int)Offset1 < (int)Offset2) {
+ assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&
+ "What happened to the chain edge?");
+ SDep Dep(Load.SU, SDep::Barrier);
Dep.setLatency(1);
SU.addPred(Dep);
continue;
}
- if (!AA->isNoAlias(
- MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
- MemoryLocation::getAfter(MMO2->getValue(),
- MMO2->getAAInfo()))) {
- SDep Dep(Load, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- }
+ }
+ // Second, the more expensive check that uses alias analysis on the
+ // base registers. If they alias, and the load offset is less than
+ // the store offset, the mark the dependence as loop carried.
+ if (Load.isUnknown() || Store.isUnknown()) {
+ SDep Dep(Load.SU, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ continue;
+ }
+ if (Load.MemOpValue == Store.MemOpValue &&
+ Load.MemOpOffset <= Store.MemOpOffset) {
+ SDep Dep(Load.SU, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ continue;
+ }
+
+ bool IsNoAlias = [&] {
+ if (BAA.isNoAlias(MemoryLocation::getBeforeOrAfter(Load.MemOpValue,
+ Load.AATags),
+ MemoryLocation::getBeforeOrAfter(Store.MemOpValue,
+ Store.AATags)))
+ return true;
+
+ // AliasAnalysis sometimes gives up on following the underlying
+ // object. In such a case, separate checks for underlying objects may
+ // prove that there are no aliases between two accesses.
+ for (const Value *LoadObj : Load.UnderlyingObjs)
+ for (const Value *StoreObj : Store.UnderlyingObjs)
+ if (!BAA.isNoAlias(
+ MemoryLocation::getBeforeOrAfter(LoadObj, Load.AATags),
+ MemoryLocation::getBeforeOrAfter(StoreObj, Store.AATags)))
+ return false;
+
+ return true;
+ }();
+
+ if (!IsNoAlias) {
+ SDep Dep(Load.SU, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
}
}
}