diff options
author | QingShan Zhang <qshanz@cn.ibm.com> | 2020-11-02 02:06:14 +0000 |
---|---|---|
committer | QingShan Zhang <qshanz@cn.ibm.com> | 2020-11-02 02:11:52 +0000 |
commit | 1d178d600af77599b398930a640991c9c965a47c (patch) | |
tree | bb7d2bbbef9514890e9c4aec790fcf64b4929621 /llvm/lib/CodeGen/MachineScheduler.cpp | |
parent | 0949f96dc6521be80ebb8ebc1e1c506165c22aac (diff) | |
download | llvm-1d178d600af77599b398930a640991c9c965a47c.zip llvm-1d178d600af77599b398930a640991c9c965a47c.tar.gz llvm-1d178d600af77599b398930a640991c9c965a47c.tar.bz2 |
[Scheduling] Fall back to the fast cluster algorithm if the DAG is too complex
We have added a new load/store cluster algorithm in D85517. However, AArch64 see
some compiling deg with the new algorithm as the IsReachable() is not cheap if
the DAG is complex. O(M+N) See https://bugs.llvm.org/show_bug.cgi?id=47966
So, this patch added a heuristic to switch to old cluster algorithm if the DAG is too complex.
Reviewed By: Owen Anderson
Differential Revision: https://reviews.llvm.org/D90144
Diffstat (limited to 'llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachineScheduler.cpp | 71 |
1 files changed, 62 insertions, 9 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index b239131..256628a 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -129,6 +129,15 @@ static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true)); +static cl::opt<bool> + ForceFastCluster("force-fast-cluster", cl::Hidden, + cl::desc("Switch to fast cluster algorithm with the lost " + "of some fusion opportunities"), + cl::init(false)); +static cl::opt<unsigned> + FastClusterThreshold("fast-cluster-threshold", cl::Hidden, + cl::desc("The threshold for fast cluster"), + cl::init(1000)); // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; @@ -1530,10 +1539,12 @@ public: void apply(ScheduleDAGInstrs *DAGInstrs) override; protected: - void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, + void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster, ScheduleDAGInstrs *DAG); void collectMemOpRecords(std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords); + bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG, + DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups); }; class StoreClusterMutation : public BaseMemOpClusterMutation { @@ -1572,8 +1583,11 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII, // Sorting all the loads/stores first, then for each load/store, checking the // following load/store one by one, until reach the first non-dependent one and // call target hook to see if they can cluster. +// If FastCluster is enabled, we assume that, all the loads/stores have been +// preprocessed and now, they didn't have dependencies on each other. void BaseMemOpClusterMutation::clusterNeighboringMemOps( - ArrayRef<MemOpInfo> MemOpRecords, ScheduleDAGInstrs *DAG) { + ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster, + ScheduleDAGInstrs *DAG) { // Keep track of the current cluster length and bytes for each SUnit. DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo; @@ -1589,8 +1603,9 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( // Skip if MemOpb has been clustered already or has dependency with // MemOpa. if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) && - !DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) && - !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU)) + (FastCluster || + (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) && + !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU)))) break; if (NextIdx == End) continue; @@ -1685,6 +1700,36 @@ void BaseMemOpClusterMutation::collectMemOpRecords( } } +bool BaseMemOpClusterMutation::groupMemOps( + ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG, + DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) { + bool FastCluster = + ForceFastCluster || + MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold; + + for (const auto &MemOp : MemOps) { + unsigned ChainPredID = DAG->SUnits.size(); + if (FastCluster) { + for (const SDep &Pred : MemOp.SU->Preds) { + // We only want to cluster the mem ops that have the same ctrl(non-data) + // pred so that they didn't have ctrl dependency for each other. But for + // store instrs, we can still cluster them if the pred is load instr. + if ((Pred.isCtrl() && + (IsLoad || + (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) && + !Pred.isArtificial()) { + ChainPredID = Pred.getSUnit()->NodeNum; + break; + } + } + } else + ChainPredID = 0; + + Groups[ChainPredID].push_back(MemOp); + } + return FastCluster; +} + /// Callback from DAG postProcessing to create cluster edges for loads/stores. void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { // Collect all the clusterable loads/stores @@ -1694,12 +1739,20 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { if (MemOpRecords.size() < 2) return; - // Sorting the loads/stores, so that, we can stop the cluster as early as - // possible. - llvm::sort(MemOpRecords); + // Put the loads/stores without dependency into the same group with some + // heuristic if the DAG is too complex to avoid compiling time blow up. + // Notice that, some fusion pair could be lost with this. + DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups; + bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups); - // Trying to cluster all the neighboring loads/stores. - clusterNeighboringMemOps(MemOpRecords, DAG); + for (auto &Group : Groups) { + // Sorting the loads/stores, so that, we can stop the cluster as early as + // possible. + llvm::sort(Group.second); + + // Trying to cluster all the neighboring loads/stores. + clusterNeighboringMemOps(Group.second, FastCluster, DAG); + } } //===----------------------------------------------------------------------===// |