aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachinePipeliner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/MachinePipeliner.cpp')
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp248
1 files changed, 219 insertions, 29 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 5a9b21d..d2c79f6 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -265,6 +265,82 @@ private:
bool getUnderlyingObjects();
};
+/// Add loop-carried chain dependencies. This class handles the same type of
+/// dependencies added by `ScheduleDAGInstrs::buildSchedGraph`, but takes into
+/// account dependencies across iterations.
+class LoopCarriedOrderDepsTracker {
+ // Type of instruction that is relevant to order-dependencies
+ enum class InstrTag {
+ Barrier = 0, ///< A barrier event instruction.
+ LoadOrStore = 1, ///< An instruction that may load or store memory, but is
+ ///< not a barrier event.
+ FPExceptions = 2, ///< An instruction that does not match above, but may
+ ///< raise floatin-point exceptions.
+ };
+
+ struct TaggedSUnit : PointerIntPair<SUnit *, 2> {
+ TaggedSUnit(SUnit *SU, InstrTag Tag)
+ : PointerIntPair<SUnit *, 2>(SU, unsigned(Tag)) {}
+
+ InstrTag getTag() const { return InstrTag(getInt()); }
+ };
+
+ /// Holds loads and stores with memory related information.
+ struct LoadStoreChunk {
+ SmallVector<SUnitWithMemInfo, 4> Loads;
+ SmallVector<SUnitWithMemInfo, 4> Stores;
+
+ void append(SUnit *SU);
+ };
+
+ SwingSchedulerDAG *DAG;
+ BatchAAResults *BAA;
+ std::vector<SUnit> &SUnits;
+
+ /// The size of SUnits, for convenience.
+ const unsigned N;
+
+ /// Loop-carried Edges.
+ std::vector<BitVector> LoopCarried;
+
+ /// Instructions related to chain dependencies. They are one of the
+ /// following:
+ ///
+ /// 1. Barrier event.
+ /// 2. Load, but neither a barrier event, invariant load, nor may load trap
+ /// value.
+ /// 3. Store, but not a barrier event.
+ /// 4. None of them, but may raise floating-point exceptions.
+ ///
+ /// This is used when analyzing loop-carried dependencies that access global
+ /// barrier instructions.
+ std::vector<TaggedSUnit> TaggedSUnits;
+
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+
+public:
+ LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, BatchAAResults *BAA,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI);
+
+ /// The main function to compute loop-carried order-dependencies.
+ void computeDependencies();
+
+ const BitVector &getLoopCarried(unsigned Idx) const {
+ return LoopCarried[Idx];
+ }
+
+private:
+ /// Tags to \p SU if the instruction may affect the order-dependencies.
+ std::optional<InstrTag> getInstrTag(SUnit *SU) const;
+
+ void addLoopCarriedDepenenciesForChunks(const LoadStoreChunk &From,
+ const LoadStoreChunk &To);
+
+ void computeDependenciesAux();
+};
+
} // end anonymous namespace
/// The "main" function for implementing Swing Modulo Scheduling.
@@ -592,13 +668,19 @@ void SwingSchedulerDAG::setMAX_II() {
/// scheduling part of the Swing Modulo Scheduling algorithm.
void SwingSchedulerDAG::schedule() {
buildSchedGraph(AA);
- addLoopCarriedDependences();
+ const LoopCarriedEdges LCE = addLoopCarriedDependences();
updatePhiDependences();
Topo.InitDAGTopologicalSorting();
changeDependences();
postProcessDAG();
DDG = std::make_unique<SwingSchedulerDDG>(SUnits, &EntrySU, &ExitSU);
- LLVM_DEBUG(dump());
+ LLVM_DEBUG({
+ dump();
+ dbgs() << "===== Loop Carried Edges Begin =====\n";
+ for (SUnit &SU : SUnits)
+ LCE.dump(&SU, TRI, &MRI);
+ dbgs() << "===== Loop Carried Edges End =====\n";
+ });
NodeSetType NodeSets;
findCircuits(NodeSets);
@@ -831,15 +913,6 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
return false;
}
-/// Return true if the instruction causes a chain between memory
-/// references before and after it.
-static bool isDependenceBarrier(MachineInstr &MI) {
- return MI.isCall() || MI.mayRaiseFPException() ||
- MI.hasUnmodeledSideEffects() ||
- (MI.hasOrderedMemoryRef() &&
- (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad()));
-}
-
SUnitWithMemInfo::SUnitWithMemInfo(SUnit *SU) : SU(SU) {
if (!getUnderlyingObjects())
return;
@@ -940,28 +1013,111 @@ static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src,
return false;
}
+void LoopCarriedOrderDepsTracker::LoadStoreChunk::append(SUnit *SU) {
+ const MachineInstr *MI = SU->getInstr();
+ if (!MI->mayLoadOrStore())
+ return;
+ (MI->mayStore() ? Stores : Loads).emplace_back(SU);
+}
+
+LoopCarriedOrderDepsTracker::LoopCarriedOrderDepsTracker(
+ SwingSchedulerDAG *SSD, BatchAAResults *BAA, const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI)
+ : DAG(SSD), BAA(BAA), SUnits(DAG->SUnits), N(SUnits.size()),
+ LoopCarried(N, BitVector(N)), TII(TII), TRI(TRI) {}
+
+void LoopCarriedOrderDepsTracker::computeDependencies() {
+ // Traverse all instructions and extract only what we are targetting.
+ for (auto &SU : SUnits) {
+ auto Tagged = getInstrTag(&SU);
+
+ // This instruction has no loop-carried order-dependencies.
+ if (!Tagged)
+ continue;
+ TaggedSUnits.emplace_back(&SU, *Tagged);
+ }
+
+ computeDependenciesAux();
+}
+
+std::optional<LoopCarriedOrderDepsTracker::InstrTag>
+LoopCarriedOrderDepsTracker::getInstrTag(SUnit *SU) const {
+ MachineInstr *MI = SU->getInstr();
+ if (TII->isGlobalMemoryObject(MI))
+ return InstrTag::Barrier;
+
+ if (MI->mayStore() ||
+ (MI->mayLoad() && !MI->isDereferenceableInvariantLoad()))
+ return InstrTag::LoadOrStore;
+
+ if (MI->mayRaiseFPException())
+ return InstrTag::FPExceptions;
+
+ return std::nullopt;
+}
+
+void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
+ const LoadStoreChunk &From, const LoadStoreChunk &To) {
+ // Add dependencies for load-to-store (WAR) from top to bottom.
+ for (const SUnitWithMemInfo &Src : From.Loads)
+ for (const SUnitWithMemInfo &Dst : To.Stores)
+ if (Src.SU->NodeNum < Dst.SU->NodeNum &&
+ hasLoopCarriedMemDep(Src, Dst, *BAA, TII, TRI))
+ LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
+
+ // TODO: The following dependencies are missed.
+ //
+ // - Dependencies for load-to-store from bottom to top.
+ // - Dependencies for store-to-load (RAW).
+ // - Dependencies for store-to-store (WAW).
+}
+
+void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
+ SmallVector<LoadStoreChunk, 2> Chunks(1);
+ for (const auto &TSU : TaggedSUnits) {
+ InstrTag Tag = TSU.getTag();
+ SUnit *SU = TSU.getPointer();
+ switch (Tag) {
+ case InstrTag::Barrier:
+ Chunks.emplace_back();
+ break;
+ case InstrTag::LoadOrStore:
+ Chunks.back().append(SU);
+ break;
+ case InstrTag::FPExceptions:
+ // TODO: Handle this properly.
+ break;
+ }
+ }
+
+ // Add dependencies between memory operations. If there are one or more
+ // barrier events between two memory instructions, we don't add a
+ // loop-carried dependence for them.
+ for (const LoadStoreChunk &Chunk : Chunks)
+ addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
+
+ // TODO: If there are multiple barrier instructions, dependencies from the
+ // last barrier instruction (or load/store below it) to the first barrier
+ // instruction (or load/store above it).
+}
+
/// Add a chain edge between a load and store if the store can be an
/// alias of the load on a subsequent iteration, i.e., a loop carried
/// dependence. This code is very similar to the code in ScheduleDAGInstrs
/// but that code doesn't create loop carried dependences.
-void SwingSchedulerDAG::addLoopCarriedDependences() {
- SmallVector<SUnitWithMemInfo, 4> PendingLoads;
- for (auto &SU : SUnits) {
- MachineInstr &MI = *SU.getInstr();
- if (isDependenceBarrier(MI))
- PendingLoads.clear();
- else if (MI.mayLoad()) {
- PendingLoads.emplace_back(&SU);
- } else if (MI.mayStore()) {
- SUnitWithMemInfo Store(&SU);
- for (const SUnitWithMemInfo &Load : PendingLoads)
- if (hasLoopCarriedMemDep(Load, Store, BAA, TII, TRI)) {
- SDep Dep(Load.SU, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- }
- }
- }
+/// TODO: Also compute output-dependencies.
+LoopCarriedEdges SwingSchedulerDAG::addLoopCarriedDependences() {
+ LoopCarriedEdges LCE;
+
+ // Add loop-carried order-dependencies
+ LoopCarriedOrderDepsTracker LCODTracker(this, &BAA, TII, TRI);
+ LCODTracker.computeDependencies();
+ for (unsigned I = 0; I != SUnits.size(); I++)
+ for (const int Succ : LCODTracker.getLoopCarried(I).set_bits())
+ LCE.OrderDeps[&SUnits[I]].insert(&SUnits[Succ]);
+
+ LCE.modifySUnits(SUnits);
+ return LCE;
}
/// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
@@ -4001,3 +4157,37 @@ const SwingSchedulerDDG::EdgesType &
SwingSchedulerDDG::getOutEdges(const SUnit *SU) const {
return getEdges(SU).Succs;
}
+
+void LoopCarriedEdges::modifySUnits(std::vector<SUnit> &SUnits) {
+ // Currently this function simply adds all dependencies represented by this
+ // object. After we properly handle missed dependencies, the logic here will
+ // be more complex, as currently missed edges should not be added to the DAG.
+ for (SUnit &SU : SUnits) {
+ SUnit *Src = &SU;
+ if (auto *OrderDep = getOrderDepOrNull(Src)) {
+ SDep Dep(Src, SDep::Barrier);
+ Dep.setLatency(1);
+ for (SUnit *Dst : *OrderDep)
+ Dst->addPred(Dep);
+ }
+ }
+}
+
+void LoopCarriedEdges::dump(SUnit *SU, const TargetRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI) const {
+ const auto *Order = getOrderDepOrNull(SU);
+
+ if (!Order)
+ return;
+
+ const auto DumpSU = [](const SUnit *SU) {
+ std::ostringstream OSS;
+ OSS << "SU(" << SU->NodeNum << ")";
+ return OSS.str();
+ };
+
+ dbgs() << " Loop carried edges from " << DumpSU(SU) << "\n"
+ << " Order\n";
+ for (SUnit *Dst : *Order)
+ dbgs() << " " << DumpSU(Dst) << "\n";
+}