diff options
Diffstat (limited to 'llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachineScheduler.cpp | 136 |
1 files changed, 72 insertions, 64 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index a8ccf26..b6d0d9a 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1530,7 +1530,10 @@ public: void apply(ScheduleDAGInstrs *DAGInstrs) override; protected: - void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG); + void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, + ScheduleDAGInstrs *DAG); + void collectMemOpRecords(std::vector<SUnit> &SUnits, + SmallVectorImpl<MemOpInfo> &MemOpRecords); }; class StoreClusterMutation : public BaseMemOpClusterMutation { @@ -1566,63 +1569,53 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII, } // end namespace llvm +// Sorting all the loads/stores first, then for each load/store, checking the +// following load/store one by one, until reach the first non-dependent one and +// call target hook to see if they can cluster. void BaseMemOpClusterMutation::clusterNeighboringMemOps( - ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) { - SmallVector<MemOpInfo, 32> MemOpRecords; - for (SUnit *SU : MemOps) { - const MachineInstr &MI = *SU->getInstr(); - SmallVector<const MachineOperand *, 4> BaseOps; - int64_t Offset; - bool OffsetIsScalable; - unsigned Width; - if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, - OffsetIsScalable, Width, TRI)) { - MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width)); - - LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: " - << Offset << ", OffsetIsScalable: " << OffsetIsScalable - << ", Width: " << Width << "\n"); - } -#ifndef NDEBUG - for (auto *Op : BaseOps) - assert(Op); -#endif - } - if (MemOpRecords.size() < 2) - return; - - llvm::sort(MemOpRecords); + ArrayRef<MemOpInfo> MemOpRecords, ScheduleDAGInstrs *DAG) { + // Keep track of the current cluster length and bytes for each SUnit. + DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo; // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to // cluster mem ops collected within `MemOpRecords` array. - unsigned ClusterLength = 1; - unsigned CurrentClusterBytes = MemOpRecords[0].Width; for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { // Decision to cluster mem ops is taken based on target dependent logic auto MemOpa = MemOpRecords[Idx]; - auto MemOpb = MemOpRecords[Idx + 1]; - ++ClusterLength; - CurrentClusterBytes += MemOpb.Width; - if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength, - CurrentClusterBytes)) { - // Current mem ops pair could not be clustered, reset cluster length, and - // go to next pair - ClusterLength = 1; - CurrentClusterBytes = MemOpb.Width; + + // Seek for the next load/store to do the cluster. + unsigned NextIdx = Idx + 1; + for (; NextIdx < End; ++NextIdx) + // Skip if MemOpb has been clustered already or has dependency with + // MemOpa. + if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) && + !DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) && + !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU)) + break; + if (NextIdx == End) continue; + + auto MemOpb = MemOpRecords[NextIdx]; + unsigned ClusterLength = 2; + unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width; + if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) { + ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1; + CurrentClusterBytes = + SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width; } + if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength, + CurrentClusterBytes)) + continue; + SUnit *SUa = MemOpa.SU; SUnit *SUb = MemOpb.SU; if (SUa->NodeNum > SUb->NodeNum) std::swap(SUa, SUb); // FIXME: Is this check really required? - if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { - ClusterLength = 1; - CurrentClusterBytes = MemOpb.Width; + if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) continue; - } LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" << SUb->NodeNum << ")\n"); @@ -1656,42 +1649,57 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( } } + SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength, + CurrentClusterBytes}; + LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength << ", Curr cluster bytes: " << CurrentClusterBytes << "\n"); } } -/// Callback from DAG postProcessing to create cluster edges for loads. -void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { - // Map DAG NodeNum to a set of dependent MemOps in store chain. - DenseMap<unsigned, SmallVector<SUnit *, 4>> StoreChains; - for (SUnit &SU : DAG->SUnits) { +void BaseMemOpClusterMutation::collectMemOpRecords( + std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) { + for (auto &SU : SUnits) { if ((IsLoad && !SU.getInstr()->mayLoad()) || (!IsLoad && !SU.getInstr()->mayStore())) continue; - unsigned ChainPredID = DAG->SUnits.size(); - for (const SDep &Pred : SU.Preds) { - // We only want to cluster the mem ops that have the same ctrl(non-data) - // pred so that they didn't have ctrl dependency for each other. But for - // store instrs, we can still cluster them if the pred is load instr. - if ((Pred.isCtrl() && - (IsLoad || - (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) && - !Pred.isArtificial()) { - ChainPredID = Pred.getSUnit()->NodeNum; - break; - } + const MachineInstr &MI = *SU.getInstr(); + SmallVector<const MachineOperand *, 4> BaseOps; + int64_t Offset; + bool OffsetIsScalable; + unsigned Width; + if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, + OffsetIsScalable, Width, TRI)) { + MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width)); + + LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: " + << Offset << ", OffsetIsScalable: " << OffsetIsScalable + << ", Width: " << Width << "\n"); } - // Insert the SU to corresponding store chain. - auto &Chain = StoreChains.FindAndConstruct(ChainPredID).second; - Chain.push_back(&SU); +#ifndef NDEBUG + for (auto *Op : BaseOps) + assert(Op); +#endif } +} + +/// Callback from DAG postProcessing to create cluster edges for loads/stores. +void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { + // Collect all the clusterable loads/stores + SmallVector<MemOpInfo, 32> MemOpRecords; + collectMemOpRecords(DAG->SUnits, MemOpRecords); + + if (MemOpRecords.size() < 2) + return; + + // Sorting the loads/stores, so that, we can stop the cluster as early as + // possible. + llvm::sort(MemOpRecords); - // Iterate over the store chains. - for (auto &SCD : StoreChains) - clusterNeighboringMemOps(SCD.second, DAG); + // Trying to cluster all the neighboring loads/stores. + clusterNeighboringMemOps(MemOpRecords, DAG); } //===----------------------------------------------------------------------===// |