diff options
author | David Penry <david.penry@arm.com> | 2022-03-29 10:13:55 -0700 |
---|---|---|
committer | David Penry <david.penry@arm.com> | 2022-04-28 13:01:18 -0700 |
commit | 28d09bbbc3d09c912b54a4d5edb32cab7de32a6f (patch) | |
tree | 9eaa368ad76da65ab8bf1b3b0f7297bece624887 /llvm/lib/CodeGen/MachinePipeliner.cpp | |
parent | 651d9f70ed75e360b0a166ddca40526c2df43fe3 (diff) | |
download | llvm-28d09bbbc3d09c912b54a4d5edb32cab7de32a6f.zip llvm-28d09bbbc3d09c912b54a4d5edb32cab7de32a6f.tar.gz llvm-28d09bbbc3d09c912b54a4d5edb32cab7de32a6f.tar.bz2 |
[CodeGen][ARM] Enable Swing Module Scheduling for ARM
This patch permits Swing Modulo Scheduling for ARM targets
turns it on by default for the Cortex-M7. The t2Bcc
instruction is recognized as a loop-ending branch.
MachinePipeliner is extended by adding support for
"unpipelineable" instructions. These instructions are
those which contribute to the loop exit test; in the SMS
papers they are removed before creating the dependence graph
and then inserted into the final schedule of the kernel and
prologues. Support for these instructions was not previously
necessary because current targets supporting SMS have only
supported it for hardware loop branches, which have no
loop-exit-contributing instructions in the loop body.
The current structure of the MachinePipeliner makes it difficult
to remove/exclude these instructions from the dependence graph.
Therefore, this patch leaves them in the graph, but adds a
"normalization" method which moves them in the schedule to
stage 0, which causes them to appear properly in kernel and
prologues.
It was also necessary to be more careful about boundary nodes
when iterating across successors in the dependence graph because
the loop exit branch is now a non-artificial successor to
instructions in the graph. In additional, schedules with physical
use/def pairs in the same cycle should be treated as creating an
invalid schedule because the scheduling logic doesn't respect
physical register dependence once scheduled to the same cycle.
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D122672
Diffstat (limited to 'llvm/lib/CodeGen/MachinePipeliner.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachinePipeliner.cpp | 97 |
1 files changed, 89 insertions, 8 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 0bffa91..9ea6e9b 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -255,6 +255,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { << "Failed to pipeline loop"; }); + LI.LoopPipelinerInfo.reset(); return Changed; } @@ -262,6 +263,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { Changed = swingModuloScheduler(L); + LI.LoopPipelinerInfo.reset(); return Changed; } @@ -354,7 +356,8 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { LI.LoopInductionVar = nullptr; LI.LoopCompare = nullptr; - if (!TII->analyzeLoopForPipelining(L.getTopBlock())) { + LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(L.getTopBlock()); + if (!LI.LoopPipelinerInfo) { LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n"); NumFailLoop++; ORE->emit([&]() { @@ -419,7 +422,7 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) { assert(L.getBlocks().size() == 1 && "SMS works on single blocks only."); SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo, - II_setByPragma); + II_setByPragma, LI.LoopPipelinerInfo.get()); MachineBasicBlock *MBB = L.getHeader(); // The kernel should not include any terminator instructions. These @@ -1422,7 +1425,7 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) { /// We ignore the back-edge recurrence in order to avoid unbounded recursion /// in the calculation of the ASAP, ALAP, etc functions. static bool ignoreDependence(const SDep &D, bool isPred) { - if (D.isArtificial()) + if (D.isArtificial() || D.getSUnit()->isBoundaryNode()) return true; return D.getKind() == SDep::Anti && isPred; } @@ -1471,6 +1474,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { SUnit *SU = &SUnits[I]; for (const SDep &S : SU->Succs) { SUnit *succ = S.getSUnit(); + if (succ->isBoundaryNode()) + continue; if (S.getLatency() == 0) zeroLatencyHeight = std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1); @@ -1788,7 +1793,8 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet, NodesAdded.insert(SU); for (auto &SI : SU->Succs) { SUnit *Successor = SI.getSUnit(); - if (!SI.isArtificial() && NodesAdded.count(Successor) == 0) + if (!SI.isArtificial() && !Successor->isBoundaryNode() && + NodesAdded.count(Successor) == 0) addConnectedNodes(Successor, NewSet, NodesAdded); } for (auto &PI : SU->Preds) { @@ -2080,6 +2086,11 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { }); } while (++NI != NE && scheduleFound); + // If a schedule is found, ensure non-pipelined instructions are in stage 0 + if (scheduleFound) + scheduleFound = + Schedule.normalizeNonPipelinedInstructions(this, LoopPipelinerInfo); + // If a schedule is found, check if it is a valid schedule too. if (scheduleFound) scheduleFound = Schedule.isValidSchedule(this); @@ -2263,7 +2274,7 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) { bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc) { if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) || - Dep.isArtificial()) + Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode()) return false; if (!SwpPruneLoopCarried) @@ -2430,7 +2441,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) { while (!Worklist.empty()) { const SDep &Cur = Worklist.pop_back_val(); SUnit *SuccSU = Cur.getSUnit(); - if (Visited.count(SuccSU)) + if (Visited.count(SuccSU) || SuccSU->isBoundaryNode()) continue; std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU); if (it == InstrToCycle.end()) @@ -2697,21 +2708,91 @@ bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, return false; } +/// Determine transitive dependences of unpipelineable instructions +SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes( + SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) { + SmallSet<SUnit *, 8> DoNotPipeline; + SmallVector<SUnit *, 8> Worklist; + + for (auto &SU : SSD->SUnits) + if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr())) + Worklist.push_back(&SU); + + while (!Worklist.empty()) { + auto SU = Worklist.pop_back_val(); + if (DoNotPipeline.count(SU)) + continue; + LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n"); + DoNotPipeline.insert(SU); + for (auto &Dep : SU->Preds) + Worklist.push_back(Dep.getSUnit()); + if (SU->getInstr()->isPHI()) + for (auto &Dep : SU->Succs) + if (Dep.getKind() == SDep::Anti) + Worklist.push_back(Dep.getSUnit()); + } + return DoNotPipeline; +} + +// Determine all instructions upon which any unpipelineable instruction depends +// and ensure that they are in stage 0. If unable to do so, return false. +bool SMSchedule::normalizeNonPipelinedInstructions( + SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) { + SmallSet<SUnit *, 8> DNP = computeUnpipelineableNodes(SSD, PLI); + + int NewLastCycle = INT_MIN; + for (SUnit &SU : SSD->SUnits) { + if (!SU.isInstr()) + continue; + if (!DNP.contains(&SU) || stageScheduled(&SU) == 0) { + NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]); + continue; + } + + // Put the non-pipelined instruction as early as possible in the schedule + int NewCycle = getFirstCycle(); + for (auto &Dep : SU.Preds) + NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle); + + int OldCycle = InstrToCycle[&SU]; + if (OldCycle != NewCycle) { + InstrToCycle[&SU] = NewCycle; + auto &OldS = getInstructions(OldCycle); + OldS.erase(std::remove(OldS.begin(), OldS.end(), &SU), OldS.end()); + getInstructions(NewCycle).emplace_back(&SU); + LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum + << ") is not pipelined; moving from cycle " << OldCycle + << " to " << NewCycle << " Instr:" << *SU.getInstr()); + } + NewLastCycle = std::max(NewLastCycle, NewCycle); + } + LastCycle = NewLastCycle; + return true; +} + // Check if the generated schedule is valid. This function checks if // an instruction that uses a physical register is scheduled in a // different stage than the definition. The pipeliner does not handle // physical register values that may cross a basic block boundary. +// Furthermore, if a physical def/use pair is assigned to the same +// cycle, orderDependence does not guarantee def/use ordering, so that +// case should be considered invalid. (The test checks for both +// earlier and same-cycle use to be more robust.) bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { for (SUnit &SU : SSD->SUnits) { if (!SU.hasPhysRegDefs) continue; int StageDef = stageScheduled(&SU); + int CycleDef = InstrToCycle[&SU]; assert(StageDef != -1 && "Instruction should have been scheduled."); for (auto &SI : SU.Succs) - if (SI.isAssignedRegDep()) - if (Register::isPhysicalRegister(SI.getReg())) + if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode()) + if (Register::isPhysicalRegister(SI.getReg())) { if (stageScheduled(SI.getSUnit()) != StageDef) return false; + if (InstrToCycle[SI.getSUnit()] <= CycleDef) + return false; + } } return true; } |