diff options
Diffstat (limited to 'llvm/lib/CodeGen/MachinePipeliner.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachinePipeliner.cpp | 532 |
1 files changed, 300 insertions, 232 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index b7d03a1..acd42aa 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -567,6 +567,7 @@ void SwingSchedulerDAG::schedule() { Topo.InitDAGTopologicalSorting(); changeDependences(); postProcessDAG(); + DDG = std::make_unique<SwingSchedulerDDG>(SUnits, &EntrySU, &ExitSU); LLVM_DEBUG(dump()); NodeSetType NodeSets; @@ -1583,29 +1584,6 @@ unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) { return RecMII; } -/// Swap all the anti dependences in the DAG. That means it is no longer a DAG, -/// but we do this to find the circuits, and then change them back. -static void swapAntiDependences(std::vector<SUnit> &SUnits) { - SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded; - for (SUnit &SU : SUnits) { - for (SDep &Pred : SU.Preds) - if (Pred.getKind() == SDep::Anti) - DepsAdded.push_back(std::make_pair(&SU, Pred)); - } - for (std::pair<SUnit *, SDep> &P : DepsAdded) { - // Remove this anti dependency and add one in the reverse direction. - SUnit *SU = P.first; - SDep &D = P.second; - SUnit *TargetSU = D.getSUnit(); - unsigned Reg = D.getReg(); - unsigned Lat = D.getLatency(); - SU->removePred(D); - SDep Dep(SU, SDep::Anti, Reg); - Dep.setLatency(Lat); - TargetSU->addPred(Dep); - } -} - /// Create the adjacency structure of the nodes in the graph. void SwingSchedulerDAG::Circuits::createAdjacencyStructure( SwingSchedulerDAG *DAG) { @@ -1614,11 +1592,11 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure( for (int i = 0, e = SUnits.size(); i != e; ++i) { Added.reset(); // Add any successor to the adjacency matrix and exclude duplicates. - for (auto &SI : SUnits[i].Succs) { + for (auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) { // Only create a back-edge on the first and last nodes of a dependence // chain. This records any chains and adds them later. - if (SI.getKind() == SDep::Output) { - int N = SI.getSUnit()->NodeNum; + if (OE.isOutputDep()) { + int N = OE.getDst()->NodeNum; int BackEdge = i; auto Dep = OutputDeps.find(BackEdge); if (Dep != OutputDeps.end()) { @@ -1628,11 +1606,19 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure( OutputDeps[N] = BackEdge; } // Do not process a boundary node, an artificial node. - // A back-edge is processed only if it goes to a Phi. - if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() || - (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI())) + if (OE.getDst()->isBoundaryNode() || OE.isArtificial()) + continue; + + // This code is retained o preserve previous behavior and prevent + // regression. This condition means that anti-dependnecies within an + // iteration are ignored when searching circuits. Therefore it's natural + // to consider this dependence as well. + // FIXME: Remove this code if it doesn't have significant impact on + // performance. + if (OE.isAntiDep()) continue; - int N = SI.getSUnit()->NodeNum; + + int N = OE.getDst()->NodeNum; if (!Added.test(N)) { AdjK[i].push_back(N); Added.set(N); @@ -1640,12 +1626,13 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure( } // A chain edge between a store and a load is treated as a back-edge in the // adjacency matrix. - for (auto &PI : SUnits[i].Preds) { - if (!SUnits[i].getInstr()->mayStore() || - !DAG->isLoopCarriedDep(&SUnits[i], PI, false)) + for (auto &IE : DAG->DDG->getInEdges(&SUnits[i])) { + SUnit *Src = IE.getSrc(); + SUnit *Dst = IE.getDst(); + if (!Dst->getInstr()->mayStore() || !DAG->isLoopCarriedDep(IE)) continue; - if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) { - int N = PI.getSUnit()->NodeNum; + if (IE.isOrderDep() && Src->getInstr()->mayLoad()) { + int N = Src->NodeNum; if (!Added.test(N)) { AdjK[i].push_back(N); Added.set(N); @@ -1720,10 +1707,6 @@ void SwingSchedulerDAG::Circuits::unblock(int U) { /// Identify all the elementary circuits in the dependence graph using /// Johnson's circuit algorithm. void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) { - // Swap all the anti dependences in the DAG. That means it is no longer a DAG, - // but we do this to find the circuits, and then change them back. - swapAntiDependences(SUnits); - Circuits Cir(SUnits, Topo); // Create the adjacency structure. Cir.createAdjacencyStructure(this); @@ -1731,9 +1714,6 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) { Cir.reset(); Cir.circuit(I, I, NodeSets, this); } - - // Change the dependences back so that we've created a DAG again. - swapAntiDependences(SUnits); } // Create artificial dependencies between the source of COPY/REG_SEQUENCE that @@ -1816,15 +1796,6 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) { } } -/// Return true for DAG nodes that we ignore when computing the cost functions. -/// We ignore the back-edge recurrence in order to avoid unbounded recursion -/// in the calculation of the ASAP, ALAP, etc functions. -static bool ignoreDependence(const SDep &D, bool isPred) { - if (D.isArtificial() || D.getSUnit()->isBoundaryNode()) - return true; - return D.getKind() == SDep::Anti && isPred; -} - /// Compute several functions need to order the nodes for scheduling. /// ASAP - Earliest time to schedule a node. /// ALAP - Latest time to schedule a node. @@ -1847,15 +1818,15 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { int asap = 0; int zeroLatencyDepth = 0; SUnit *SU = &SUnits[I]; - for (const SDep &P : SU->Preds) { - SUnit *pred = P.getSUnit(); - if (P.getLatency() == 0) + for (const auto &IE : DDG->getInEdges(SU)) { + SUnit *Pred = IE.getSrc(); + if (IE.getLatency() == 0) zeroLatencyDepth = - std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1); - if (ignoreDependence(P, true)) + std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1); + if (IE.ignoreDependence(true)) continue; - asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() - - getDistance(pred, SU, P) * MII)); + asap = std::max(asap, (int)(getASAP(Pred) + IE.getLatency() - + IE.getDistance() * MII)); } maxASAP = std::max(maxASAP, asap); ScheduleInfo[I].ASAP = asap; @@ -1867,17 +1838,17 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { int alap = maxASAP; int zeroLatencyHeight = 0; SUnit *SU = &SUnits[I]; - for (const SDep &S : SU->Succs) { - SUnit *succ = S.getSUnit(); - if (succ->isBoundaryNode()) + for (const auto &OE : DDG->getOutEdges(SU)) { + SUnit *Succ = OE.getDst(); + if (Succ->isBoundaryNode()) continue; - if (S.getLatency() == 0) + if (OE.getLatency() == 0) zeroLatencyHeight = - std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1); - if (ignoreDependence(S, true)) + std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1); + if (OE.ignoreDependence(true)) continue; - alap = std::min(alap, (int)(getALAP(succ) - S.getLatency() + - getDistance(SU, succ, S) * MII)); + alap = std::min(alap, (int)(getALAP(Succ) - OE.getLatency() + + OE.getDistance() * MII)); } ScheduleInfo[I].ALAP = alap; @@ -1906,26 +1877,33 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { /// as the predecessors of the elements of NodeOrder that are not also in /// NodeOrder. static bool pred_L(SetVector<SUnit *> &NodeOrder, - SmallSetVector<SUnit *, 8> &Preds, + SmallSetVector<SUnit *, 8> &Preds, SwingSchedulerDDG *DDG, const NodeSet *S = nullptr) { Preds.clear(); - for (const SUnit *SU : NodeOrder) { - for (const SDep &Pred : SU->Preds) { - if (S && S->count(Pred.getSUnit()) == 0) + + for (SUnit *SU : NodeOrder) { + for (const auto &IE : DDG->getInEdges(SU)) { + SUnit *PredSU = IE.getSrc(); + if (S && S->count(PredSU) == 0) continue; - if (ignoreDependence(Pred, true)) + if (IE.ignoreDependence(true)) continue; - if (NodeOrder.count(Pred.getSUnit()) == 0) - Preds.insert(Pred.getSUnit()); + if (NodeOrder.count(PredSU) == 0) + Preds.insert(PredSU); } - // Back-edges are predecessors with an anti-dependence. - for (const SDep &Succ : SU->Succs) { - if (Succ.getKind() != SDep::Anti) + + // FIXME: The following loop-carried dependencies may also need to be + // considered. + // - Physical register dependencies (true-dependence and WAW). + // - Memory dependencies. + for (const auto &OE : DDG->getOutEdges(SU)) { + SUnit *SuccSU = OE.getDst(); + if (!OE.isAntiDep()) continue; - if (S && S->count(Succ.getSUnit()) == 0) + if (S && S->count(SuccSU) == 0) continue; - if (NodeOrder.count(Succ.getSUnit()) == 0) - Preds.insert(Succ.getSUnit()); + if (NodeOrder.count(SuccSU) == 0) + Preds.insert(SuccSU); } } return !Preds.empty(); @@ -1935,25 +1913,33 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder, /// as the successors of the elements of NodeOrder that are not also in /// NodeOrder. static bool succ_L(SetVector<SUnit *> &NodeOrder, - SmallSetVector<SUnit *, 8> &Succs, + SmallSetVector<SUnit *, 8> &Succs, SwingSchedulerDDG *DDG, const NodeSet *S = nullptr) { Succs.clear(); - for (const SUnit *SU : NodeOrder) { - for (const SDep &Succ : SU->Succs) { - if (S && S->count(Succ.getSUnit()) == 0) + + for (SUnit *SU : NodeOrder) { + for (const auto &OE : DDG->getOutEdges(SU)) { + SUnit *SuccSU = OE.getDst(); + if (S && S->count(SuccSU) == 0) continue; - if (ignoreDependence(Succ, false)) + if (OE.ignoreDependence(false)) continue; - if (NodeOrder.count(Succ.getSUnit()) == 0) - Succs.insert(Succ.getSUnit()); + if (NodeOrder.count(SuccSU) == 0) + Succs.insert(SuccSU); } - for (const SDep &Pred : SU->Preds) { - if (Pred.getKind() != SDep::Anti) + + // FIXME: The following loop-carried dependencies may also need to be + // considered. + // - Physical register dependnecies (true-dependnece and WAW). + // - Memory dependencies. + for (const auto &IE : DDG->getInEdges(SU)) { + SUnit *PredSU = IE.getSrc(); + if (!IE.isAntiDep()) continue; - if (S && S->count(Pred.getSUnit()) == 0) + if (S && S->count(PredSU) == 0) continue; - if (NodeOrder.count(Pred.getSUnit()) == 0) - Succs.insert(Pred.getSUnit()); + if (NodeOrder.count(PredSU) == 0) + Succs.insert(PredSU); } } return !Succs.empty(); @@ -1964,7 +1950,8 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder, static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path, SetVector<SUnit *> &DestNodes, SetVector<SUnit *> &Exclude, - SmallPtrSet<SUnit *, 8> &Visited) { + SmallPtrSet<SUnit *, 8> &Visited, + SwingSchedulerDDG *DDG) { if (Cur->isBoundaryNode()) return false; if (Exclude.contains(Cur)) @@ -1974,14 +1961,14 @@ static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path, if (!Visited.insert(Cur).second) return Path.contains(Cur); bool FoundPath = false; - for (auto &SI : Cur->Succs) - if (!ignoreDependence(SI, false)) + for (const auto &OE : DDG->getOutEdges(Cur)) + if (!OE.ignoreDependence(false)) FoundPath |= - computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited); - for (auto &PI : Cur->Preds) - if (PI.getKind() == SDep::Anti) + computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG); + for (const auto &IE : DDG->getInEdges(Cur)) + if (IE.isAntiDep() && IE.getDistance() == 0) FoundPath |= - computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited); + computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG); if (FoundPath) Path.insert(Cur); return FoundPath; @@ -2078,14 +2065,14 @@ void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) { for (int i = 0, e = NodeSets.size(); i < e; ++i) { NodeSet &N1 = NodeSets[i]; SmallSetVector<SUnit *, 8> S1; - if (N1.empty() || !succ_L(N1, S1)) + if (N1.empty() || !succ_L(N1, S1, DDG.get())) continue; for (int j = i + 1; j < e; ++j) { NodeSet &N2 = NodeSets[j]; if (N1.compareRecMII(N2) != 0) continue; SmallSetVector<SUnit *, 8> S2; - if (N2.empty() || !succ_L(N2, S2)) + if (N2.empty() || !succ_L(N2, S2, DDG.get())) continue; if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) { N1.setColocate(++Colocate); @@ -2126,22 +2113,22 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { for (NodeSet &I : NodeSets) { SmallSetVector<SUnit *, 8> N; // Add the nodes from the current node set to the previous node set. - if (succ_L(I, N)) { + if (succ_L(I, N, DDG.get())) { SetVector<SUnit *> Path; for (SUnit *NI : N) { Visited.clear(); - computePath(NI, Path, NodesAdded, I, Visited); + computePath(NI, Path, NodesAdded, I, Visited, DDG.get()); } if (!Path.empty()) I.insert(Path.begin(), Path.end()); } // Add the nodes from the previous node set to the current node set. N.clear(); - if (succ_L(NodesAdded, N)) { + if (succ_L(NodesAdded, N, DDG.get())) { SetVector<SUnit *> Path; for (SUnit *NI : N) { Visited.clear(); - computePath(NI, Path, I, NodesAdded, Visited); + computePath(NI, Path, I, NodesAdded, Visited, DDG.get()); } if (!Path.empty()) I.insert(Path.begin(), Path.end()); @@ -2153,7 +2140,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { // in a recurrent set. NodeSet NewSet; SmallSetVector<SUnit *, 8> N; - if (succ_L(NodesAdded, N)) + if (succ_L(NodesAdded, N, DDG.get())) for (SUnit *I : N) addConnectedNodes(I, NewSet, NodesAdded); if (!NewSet.empty()) @@ -2162,7 +2149,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { // Create a new node set with the connected nodes of any predecessor of a node // in a recurrent set. NewSet.clear(); - if (pred_L(NodesAdded, N)) + if (pred_L(NodesAdded, N, DDG.get())) for (SUnit *I : N) addConnectedNodes(I, NewSet, NodesAdded); if (!NewSet.empty()) @@ -2185,15 +2172,15 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet, SetVector<SUnit *> &NodesAdded) { NewSet.insert(SU); NodesAdded.insert(SU); - for (auto &SI : SU->Succs) { - SUnit *Successor = SI.getSUnit(); - if (!SI.isArtificial() && !Successor->isBoundaryNode() && + for (auto &OE : DDG->getOutEdges(SU)) { + SUnit *Successor = OE.getDst(); + if (!OE.isArtificial() && !Successor->isBoundaryNode() && NodesAdded.count(Successor) == 0) addConnectedNodes(Successor, NewSet, NodesAdded); } - for (auto &PI : SU->Preds) { - SUnit *Predecessor = PI.getSUnit(); - if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0) + for (auto &IE : DDG->getInEdges(SU)) { + SUnit *Predecessor = IE.getSrc(); + if (!IE.isArtificial() && NodesAdded.count(Predecessor) == 0) addConnectedNodes(Predecessor, NewSet, NodesAdded); } } @@ -2259,11 +2246,12 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n"); OrderKind Order; SmallSetVector<SUnit *, 8> N; - if (pred_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) { + if (pred_L(NodeOrder, N, DDG.get()) && llvm::set_is_subset(N, Nodes)) { R.insert(N.begin(), N.end()); Order = BottomUp; LLVM_DEBUG(dbgs() << " Bottom up (preds) "); - } else if (succ_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) { + } else if (succ_L(NodeOrder, N, DDG.get()) && + llvm::set_is_subset(N, Nodes)) { R.insert(N.begin(), N.end()); Order = TopDown; LLVM_DEBUG(dbgs() << " Top down (succs) "); @@ -2313,30 +2301,36 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { NodeOrder.insert(maxHeight); LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " "); R.remove(maxHeight); - for (const auto &I : maxHeight->Succs) { - if (Nodes.count(I.getSUnit()) == 0) + for (const auto &OE : DDG->getOutEdges(maxHeight)) { + SUnit *SU = OE.getDst(); + if (Nodes.count(SU) == 0) continue; - if (NodeOrder.contains(I.getSUnit())) + if (NodeOrder.contains(SU)) continue; - if (ignoreDependence(I, false)) + if (OE.ignoreDependence(false)) continue; - R.insert(I.getSUnit()); + R.insert(SU); } - // Back-edges are predecessors with an anti-dependence. - for (const auto &I : maxHeight->Preds) { - if (I.getKind() != SDep::Anti) + + // FIXME: The following loop-carried dependencies may also need to be + // considered. + // - Physical register dependnecies (true-dependnece and WAW). + // - Memory dependencies. + for (const auto &IE : DDG->getInEdges(maxHeight)) { + SUnit *SU = IE.getSrc(); + if (!IE.isAntiDep()) continue; - if (Nodes.count(I.getSUnit()) == 0) + if (Nodes.count(SU) == 0) continue; - if (NodeOrder.contains(I.getSUnit())) + if (NodeOrder.contains(SU)) continue; - R.insert(I.getSUnit()); + R.insert(SU); } } Order = BottomUp; LLVM_DEBUG(dbgs() << "\n Switching order to bottom up "); SmallSetVector<SUnit *, 8> N; - if (pred_L(NodeOrder, N, &Nodes)) + if (pred_L(NodeOrder, N, DDG.get(), &Nodes)) R.insert(N.begin(), N.end()); } else { // Choose the node with the maximum depth. If more than one, choose @@ -2364,28 +2358,34 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { R.insert(Nodes.getNode(0)); break; } - for (const auto &I : maxDepth->Preds) { - if (Nodes.count(I.getSUnit()) == 0) + for (const auto &IE : DDG->getInEdges(maxDepth)) { + SUnit *SU = IE.getSrc(); + if (Nodes.count(SU) == 0) continue; - if (NodeOrder.contains(I.getSUnit())) + if (NodeOrder.contains(SU)) continue; - R.insert(I.getSUnit()); + R.insert(SU); } - // Back-edges are predecessors with an anti-dependence. - for (const auto &I : maxDepth->Succs) { - if (I.getKind() != SDep::Anti) + + // FIXME: The following loop-carried dependencies may also need to be + // considered. + // - Physical register dependnecies (true-dependnece and WAW). + // - Memory dependencies. + for (const auto &OE : DDG->getOutEdges(maxDepth)) { + SUnit *SU = OE.getDst(); + if (!OE.isAntiDep()) continue; - if (Nodes.count(I.getSUnit()) == 0) + if (Nodes.count(SU) == 0) continue; - if (NodeOrder.contains(I.getSUnit())) + if (NodeOrder.contains(SU)) continue; - R.insert(I.getSUnit()); + R.insert(SU); } } Order = TopDown; LLVM_DEBUG(dbgs() << "\n Switching order to top down "); SmallSetVector<SUnit *, 8> N; - if (succ_L(NodeOrder, N, &Nodes)) + if (succ_L(NodeOrder, N, DDG.get(), &Nodes)) R.insert(N.begin(), N.end()); } } @@ -2458,7 +2458,7 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { // loop-carried output/order dependencies. Empirically, there are also // cases where scheduling becomes possible with backward search. if (SU->getInstr()->isPHI() || - Schedule.onlyHasLoopCarriedOutputOrOrderPreds(SU, this)) + Schedule.onlyHasLoopCarriedOutputOrOrderPreds(SU, this->getDDG())) scheduleFound = Schedule.insert(SU, LateStart, EarlyStart, II); else scheduleFound = Schedule.insert(SU, EarlyStart, LateStart, II); @@ -2678,22 +2678,20 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) { /// Return true for an order or output dependence that is loop carried /// potentially. A dependence is loop carried if the destination defines a value /// that may be used or defined by the source in a subsequent iteration. -bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, - bool isSucc) const { - if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) || - Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode()) +bool SwingSchedulerDAG::isLoopCarriedDep( + const SwingSchedulerDDGEdge &Edge) const { + if ((!Edge.isOrderDep() && !Edge.isOutputDep()) || Edge.isArtificial() || + Edge.getDst()->isBoundaryNode()) return false; if (!SwpPruneLoopCarried) return true; - if (Dep.getKind() == SDep::Output) + if (Edge.isOutputDep()) return true; - MachineInstr *SI = Source->getInstr(); - MachineInstr *DI = Dep.getSUnit()->getInstr(); - if (!isSucc) - std::swap(SI, DI); + MachineInstr *SI = Edge.getSrc()->getInstr(); + MachineInstr *DI = Edge.getDst()->getInstr(); assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI."); // Assume ordered loads and stores may have a loop carried dependence. @@ -2815,46 +2813,48 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) { } // Return the cycle of the earliest scheduled instruction in the chain. -int SMSchedule::earliestCycleInChain(const SDep &Dep) { +int SMSchedule::earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, + const SwingSchedulerDDG *DDG) { SmallPtrSet<SUnit *, 8> Visited; - SmallVector<SDep, 8> Worklist; + SmallVector<SwingSchedulerDDGEdge, 8> Worklist; Worklist.push_back(Dep); int EarlyCycle = INT_MAX; while (!Worklist.empty()) { - const SDep &Cur = Worklist.pop_back_val(); - SUnit *PrevSU = Cur.getSUnit(); + const SwingSchedulerDDGEdge &Cur = Worklist.pop_back_val(); + SUnit *PrevSU = Cur.getSrc(); if (Visited.count(PrevSU)) continue; std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU); if (it == InstrToCycle.end()) continue; EarlyCycle = std::min(EarlyCycle, it->second); - for (const auto &PI : PrevSU->Preds) - if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output) - Worklist.push_back(PI); + for (const auto &IE : DDG->getInEdges(PrevSU)) + if (IE.isOrderDep() || IE.isOutputDep()) + Worklist.push_back(IE); Visited.insert(PrevSU); } return EarlyCycle; } // Return the cycle of the latest scheduled instruction in the chain. -int SMSchedule::latestCycleInChain(const SDep &Dep) { +int SMSchedule::latestCycleInChain(const SwingSchedulerDDGEdge &Dep, + const SwingSchedulerDDG *DDG) { SmallPtrSet<SUnit *, 8> Visited; - SmallVector<SDep, 8> Worklist; + SmallVector<SwingSchedulerDDGEdge, 8> Worklist; Worklist.push_back(Dep); int LateCycle = INT_MIN; while (!Worklist.empty()) { - const SDep &Cur = Worklist.pop_back_val(); - SUnit *SuccSU = Cur.getSUnit(); + const SwingSchedulerDDGEdge &Cur = Worklist.pop_back_val(); + SUnit *SuccSU = Cur.getDst(); if (Visited.count(SuccSU) || SuccSU->isBoundaryNode()) continue; std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU); if (it == InstrToCycle.end()) continue; LateCycle = std::max(LateCycle, it->second); - for (const auto &SI : SuccSU->Succs) - if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output) - Worklist.push_back(SI); + for (const auto &OE : DDG->getOutEdges(SuccSU)) + if (OE.isOrderDep() || OE.isOutputDep()) + Worklist.push_back(OE); Visited.insert(SuccSU); } return LateCycle; @@ -2865,7 +2865,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) { /// to a Phi, which contains a reference to another Phi. static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) { for (auto &P : SU->Preds) - if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI()) + if (P.getKind() == SDep::Anti && P.getSUnit()->getInstr()->isPHI()) for (auto &S : P.getSUnit()->Succs) if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI()) return P.getSUnit(); @@ -2876,57 +2876,47 @@ static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) { /// depends on any predecessor or successor nodes scheduled already. void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG) { + const SwingSchedulerDDG *DDG = DAG->getDDG(); + // Iterate over each instruction that has been scheduled already. The start // slot computation depends on whether the previously scheduled instruction // is a predecessor or successor of the specified instruction. for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) { - - // Iterate over each instruction in the current cycle. for (SUnit *I : getInstructions(cycle)) { - // Because we're processing a DAG for the dependences, we recognize - // the back-edge in recurrences by anti dependences. - for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) { - const SDep &Dep = SU->Preds[i]; - if (Dep.getSUnit() == I) { - if (!DAG->isBackedge(SU, Dep)) { - int EarlyStart = cycle + Dep.getLatency() - - DAG->getDistance(Dep.getSUnit(), SU, Dep) * II; - *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart); - if (DAG->isLoopCarriedDep(SU, Dep, false)) { - int End = earliestCycleInChain(Dep) + (II - 1); - *MinLateStart = std::min(*MinLateStart, End); - } - } else { - int LateStart = cycle - Dep.getLatency() + - DAG->getDistance(SU, Dep.getSUnit(), Dep) * II; - *MinLateStart = std::min(*MinLateStart, LateStart); + for (const auto &IE : DDG->getInEdges(SU)) { + if (IE.getSrc() == I) { + // FIXME: Add reverse edge to `DDG` instead of calling + // `isLoopCarriedDep` + if (DAG->isLoopCarriedDep(IE)) { + int End = earliestCycleInChain(IE, DDG) + (II - 1); + *MinLateStart = std::min(*MinLateStart, End); } + int EarlyStart = cycle + IE.getLatency() - IE.getDistance() * II; + *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart); } + } + + for (const auto &OE : DDG->getOutEdges(SU)) { + if (OE.getDst() == I) { + // FIXME: Add reverse edge to `DDG` instead of calling + // `isLoopCarriedDep` + if (DAG->isLoopCarriedDep(OE)) { + int Start = latestCycleInChain(OE, DDG) + 1 - II; + *MaxEarlyStart = std::max(*MaxEarlyStart, Start); + } + int LateStart = cycle - OE.getLatency() + OE.getDistance() * II; + *MinLateStart = std::min(*MinLateStart, LateStart); + } + } + + SUnit *BE = multipleIterations(I, DAG); + for (const auto &Dep : SU->Preds) { // For instruction that requires multiple iterations, make sure that // the dependent instruction is not scheduled past the definition. - SUnit *BE = multipleIterations(I, DAG); if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() && !SU->isPred(I)) *MinLateStart = std::min(*MinLateStart, cycle); } - for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) { - if (SU->Succs[i].getSUnit() == I) { - const SDep &Dep = SU->Succs[i]; - if (!DAG->isBackedge(SU, Dep)) { - int LateStart = cycle - Dep.getLatency() + - DAG->getDistance(SU, Dep.getSUnit(), Dep) * II; - *MinLateStart = std::min(*MinLateStart, LateStart); - if (DAG->isLoopCarriedDep(SU, Dep)) { - int Start = latestCycleInChain(Dep) + 1 - II; - *MaxEarlyStart = std::max(*MaxEarlyStart, Start); - } - } else { - int EarlyStart = cycle + Dep.getLatency() - - DAG->getDistance(Dep.getSUnit(), SU, Dep) * II; - *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart); - } - } - } } } } @@ -2943,6 +2933,7 @@ void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, unsigned MoveDef = 0; unsigned MoveUse = 0; int StageInst1 = stageScheduled(SU); + const SwingSchedulerDDG *DDG = SSD->getDDG(); unsigned Pos = 0; for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E; @@ -3000,10 +2991,10 @@ void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, } // Check for order dependences between instructions. Make sure the source // is ordered before the destination. - for (auto &S : SU->Succs) { - if (S.getSUnit() != *I) + for (auto &OE : DDG->getOutEdges(SU)) { + if (OE.getDst() != *I) continue; - if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) { + if (OE.isOrderDep() && stageScheduled(*I) == StageInst1) { OrderBeforeUse = true; if (Pos < MoveUse) MoveUse = Pos; @@ -3011,18 +3002,17 @@ void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, // We did not handle HW dependences in previous for loop, // and we normally set Latency = 0 for Anti/Output deps, // so may have nodes in same cycle with Anti/Output dependent on HW regs. - else if ((S.getKind() == SDep::Anti || S.getKind() == SDep::Output) && + else if ((OE.isAntiDep() || OE.isOutputDep()) && stageScheduled(*I) == StageInst1) { OrderBeforeUse = true; if ((MoveUse == 0) || (Pos < MoveUse)) MoveUse = Pos; } } - for (auto &P : SU->Preds) { - if (P.getSUnit() != *I) + for (auto &IE : DDG->getInEdges(SU)) { + if (IE.getSrc() != *I) continue; - if ((P.getKind() == SDep::Order || P.getKind() == SDep::Anti || - P.getKind() == SDep::Output) && + if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) && stageScheduled(*I) == StageInst1) { OrderAfterDef = true; MoveDef = Pos; @@ -3117,12 +3107,9 @@ bool SMSchedule::isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, /// Return true if all scheduled predecessors are loop-carried output/order /// dependencies. bool SMSchedule::onlyHasLoopCarriedOutputOrOrderPreds( - SUnit *SU, SwingSchedulerDAG *DAG) const { - for (const SDep &Pred : SU->Preds) - if (InstrToCycle.count(Pred.getSUnit()) && !DAG->isBackedge(SU, Pred)) - return false; - for (const SDep &Succ : SU->Succs) - if (InstrToCycle.count(Succ.getSUnit()) && DAG->isBackedge(SU, Succ)) + SUnit *SU, const SwingSchedulerDDG *DDG) const { + for (const auto &IE : DDG->getInEdges(SU)) + if (InstrToCycle.count(IE.getSrc())) return false; return true; } @@ -3137,18 +3124,21 @@ SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes( if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr())) Worklist.push_back(&SU); + const SwingSchedulerDDG *DDG = SSD->getDDG(); while (!Worklist.empty()) { auto SU = Worklist.pop_back_val(); if (DoNotPipeline.count(SU)) continue; LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n"); DoNotPipeline.insert(SU); - for (auto &Dep : SU->Preds) - Worklist.push_back(Dep.getSUnit()); - if (SU->getInstr()->isPHI()) - for (auto &Dep : SU->Succs) - if (Dep.getKind() == SDep::Anti) - Worklist.push_back(Dep.getSUnit()); + for (const auto &IE : DDG->getInEdges(SU)) + Worklist.push_back(IE.getSrc()); + + // To preserve previous behavior and prevent regression + // FIXME: Remove if this doesn't have significant impact on + for (const auto &OE : DDG->getOutEdges(SU)) + if (OE.getDistance() == 1) + Worklist.push_back(OE.getDst()); } return DoNotPipeline; } @@ -3170,8 +3160,15 @@ bool SMSchedule::normalizeNonPipelinedInstructions( // Put the non-pipelined instruction as early as possible in the schedule int NewCycle = getFirstCycle(); - for (auto &Dep : SU.Preds) - NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle); + for (const auto &IE : SSD->getDDG()->getInEdges(&SU)) + if (IE.getDistance() == 0) + NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle); + + // To preserve previous behavior and prevent regression + // FIXME: Remove if this doesn't have significant impact on performance + for (auto &OE : SSD->getDDG()->getOutEdges(&SU)) + if (OE.getDistance() == 1) + NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle); int OldCycle = InstrToCycle[&SU]; if (OldCycle != NewCycle) { @@ -3204,14 +3201,16 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { int StageDef = stageScheduled(&SU); int CycleDef = InstrToCycle[&SU]; assert(StageDef != -1 && "Instruction should have been scheduled."); - for (auto &SI : SU.Succs) - if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode()) - if (Register::isPhysicalRegister(SI.getReg())) { - if (stageScheduled(SI.getSUnit()) != StageDef) + for (auto &OE : SSD->getDDG()->getOutEdges(&SU)) { + SUnit *Dst = OE.getDst(); + if (OE.isAssignedRegDep() && !Dst->isBoundaryNode()) + if (Register::isPhysicalRegister(OE.getReg())) { + if (stageScheduled(Dst) != StageDef) return false; - if (InstrToCycle[SI.getSUnit()] <= CycleDef) + if (InstrToCycle[Dst] <= CycleDef) return false; } + } } return true; } @@ -3223,7 +3222,7 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { /// The method below checks whether the property is met. /// If not, debug information is printed and statistics information updated. /// Note that we do not use an assert statement. -/// The reason is that although an invalid node oder may prevent +/// The reason is that although an invalid node order may prevent /// the pipeliner from finding a pipelined schedule for arbitrary II, /// it does not lead to the generation of incorrect code. void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { @@ -3261,8 +3260,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { (void)Succ; (void)Pred; - for (SDep &PredEdge : SU->Preds) { - SUnit *PredSU = PredEdge.getSUnit(); + for (const auto &IE : DDG->getInEdges(SU)) { + SUnit *PredSU = IE.getSrc(); unsigned PredIndex = std::get<1>( *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey)); if (!PredSU->getInstr()->isPHI() && PredIndex < Index) { @@ -3272,8 +3271,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { } } - for (SDep &SuccEdge : SU->Succs) { - SUnit *SuccSU = SuccEdge.getSUnit(); + for (const auto &OE : DDG->getOutEdges(SU)) { + SUnit *SuccSU = OE.getDst(); // Do not process a boundary node, it was not included in NodeOrder, // hence not in Indices either, call to std::lower_bound() below will // return Indices.end(). @@ -3750,3 +3749,72 @@ void ResourceManager::init(int II) { NumScheduledMops.clear(); NumScheduledMops.resize(II); } + +bool SwingSchedulerDDGEdge::ignoreDependence(bool IgnoreAnti) const { + if (Pred.isArtificial() || Dst->isBoundaryNode()) + return true; + // Currently, dependence that is an anti-dependences but not a loop-carried is + // also ignored. This behavior is preserved to prevent regression. + // FIXME: Remove if this doesn't have significant impact on performance + return IgnoreAnti && (Pred.getKind() == SDep::Kind::Anti || Distance != 0); +} + +SwingSchedulerDDG::SwingSchedulerDDGEdges & +SwingSchedulerDDG::getEdges(const SUnit *SU) { + if (SU == EntrySU) + return EntrySUEdges; + if (SU == ExitSU) + return ExitSUEdges; + return EdgesVec[SU->NodeNum]; +} + +const SwingSchedulerDDG::SwingSchedulerDDGEdges & +SwingSchedulerDDG::getEdges(const SUnit *SU) const { + if (SU == EntrySU) + return EntrySUEdges; + if (SU == ExitSU) + return ExitSUEdges; + return EdgesVec[SU->NodeNum]; +} + +void SwingSchedulerDDG::addEdge(const SUnit *SU, + const SwingSchedulerDDGEdge &Edge) { + auto &Edges = getEdges(SU); + if (Edge.getSrc() == SU) + Edges.Succs.push_back(Edge); + else + Edges.Preds.push_back(Edge); +} + +void SwingSchedulerDDG::initEdges(SUnit *SU) { + for (const auto &PI : SU->Preds) { + SwingSchedulerDDGEdge Edge(SU, PI, false); + addEdge(SU, Edge); + } + + for (const auto &SI : SU->Succs) { + SwingSchedulerDDGEdge Edge(SU, SI, true); + addEdge(SU, Edge); + } +} + +SwingSchedulerDDG::SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, + SUnit *ExitSU) + : EntrySU(EntrySU), ExitSU(ExitSU) { + EdgesVec.resize(SUnits.size()); + + initEdges(EntrySU); + initEdges(ExitSU); + for (auto &SU : SUnits) + initEdges(&SU); +} + +const SwingSchedulerDDG::EdgesType & +SwingSchedulerDDG::getInEdges(const SUnit *SU) const { + return getEdges(SU).Preds; +} + +const SwingSchedulerDDG::EdgesType & +SwingSchedulerDDG::getOutEdges(const SUnit *SU) const { + return getEdges(SU).Succs; +} |