aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachineScheduler.cpp
diff options
context:
space:
mode:
authorQingShan Zhang <qshanz@cn.ibm.com>2020-11-02 02:06:14 +0000
committerQingShan Zhang <qshanz@cn.ibm.com>2020-11-02 02:11:52 +0000
commit1d178d600af77599b398930a640991c9c965a47c (patch)
treebb7d2bbbef9514890e9c4aec790fcf64b4929621 /llvm/lib/CodeGen/MachineScheduler.cpp
parent0949f96dc6521be80ebb8ebc1e1c506165c22aac (diff)
downloadllvm-1d178d600af77599b398930a640991c9c965a47c.zip
llvm-1d178d600af77599b398930a640991c9c965a47c.tar.gz
llvm-1d178d600af77599b398930a640991c9c965a47c.tar.bz2
[Scheduling] Fall back to the fast cluster algorithm if the DAG is too complex
We have added a new load/store cluster algorithm in D85517. However, AArch64 see some compiling deg with the new algorithm as the IsReachable() is not cheap if the DAG is complex. O(M+N) See https://bugs.llvm.org/show_bug.cgi?id=47966 So, this patch added a heuristic to switch to old cluster algorithm if the DAG is too complex. Reviewed By: Owen Anderson Differential Revision: https://reviews.llvm.org/D90144
Diffstat (limited to 'llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp71
1 files changed, 62 insertions, 9 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index b239131..256628a 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -129,6 +129,15 @@ static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
cl::desc("Enable memop clustering."),
cl::init(true));
+static cl::opt<bool>
+ ForceFastCluster("force-fast-cluster", cl::Hidden,
+ cl::desc("Switch to fast cluster algorithm with the lost "
+ "of some fusion opportunities"),
+ cl::init(false));
+static cl::opt<unsigned>
+ FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
+ cl::desc("The threshold for fast cluster"),
+ cl::init(1000));
// DAG subtrees must have at least this many nodes.
static const unsigned MinSubtreeSize = 8;
@@ -1530,10 +1539,12 @@ public:
void apply(ScheduleDAGInstrs *DAGInstrs) override;
protected:
- void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps,
+ void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
ScheduleDAGInstrs *DAG);
void collectMemOpRecords(std::vector<SUnit> &SUnits,
SmallVectorImpl<MemOpInfo> &MemOpRecords);
+ bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
+ DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
};
class StoreClusterMutation : public BaseMemOpClusterMutation {
@@ -1572,8 +1583,11 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII,
// Sorting all the loads/stores first, then for each load/store, checking the
// following load/store one by one, until reach the first non-dependent one and
// call target hook to see if they can cluster.
+// If FastCluster is enabled, we assume that, all the loads/stores have been
+// preprocessed and now, they didn't have dependencies on each other.
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
- ArrayRef<MemOpInfo> MemOpRecords, ScheduleDAGInstrs *DAG) {
+ ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
+ ScheduleDAGInstrs *DAG) {
// Keep track of the current cluster length and bytes for each SUnit.
DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
@@ -1589,8 +1603,9 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
// Skip if MemOpb has been clustered already or has dependency with
// MemOpa.
if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
- !DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
- !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))
+ (FastCluster ||
+ (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
+ !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
break;
if (NextIdx == End)
continue;
@@ -1685,6 +1700,36 @@ void BaseMemOpClusterMutation::collectMemOpRecords(
}
}
+bool BaseMemOpClusterMutation::groupMemOps(
+ ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
+ DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) {
+ bool FastCluster =
+ ForceFastCluster ||
+ MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
+
+ for (const auto &MemOp : MemOps) {
+ unsigned ChainPredID = DAG->SUnits.size();
+ if (FastCluster) {
+ for (const SDep &Pred : MemOp.SU->Preds) {
+ // We only want to cluster the mem ops that have the same ctrl(non-data)
+ // pred so that they didn't have ctrl dependency for each other. But for
+ // store instrs, we can still cluster them if the pred is load instr.
+ if ((Pred.isCtrl() &&
+ (IsLoad ||
+ (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
+ !Pred.isArtificial()) {
+ ChainPredID = Pred.getSUnit()->NodeNum;
+ break;
+ }
+ }
+ } else
+ ChainPredID = 0;
+
+ Groups[ChainPredID].push_back(MemOp);
+ }
+ return FastCluster;
+}
+
/// Callback from DAG postProcessing to create cluster edges for loads/stores.
void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
// Collect all the clusterable loads/stores
@@ -1694,12 +1739,20 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
if (MemOpRecords.size() < 2)
return;
- // Sorting the loads/stores, so that, we can stop the cluster as early as
- // possible.
- llvm::sort(MemOpRecords);
+ // Put the loads/stores without dependency into the same group with some
+ // heuristic if the DAG is too complex to avoid compiling time blow up.
+ // Notice that, some fusion pair could be lost with this.
+ DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups;
+ bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
- // Trying to cluster all the neighboring loads/stores.
- clusterNeighboringMemOps(MemOpRecords, DAG);
+ for (auto &Group : Groups) {
+ // Sorting the loads/stores, so that, we can stop the cluster as early as
+ // possible.
+ llvm::sort(Group.second);
+
+ // Trying to cluster all the neighboring loads/stores.
+ clusterNeighboringMemOps(Group.second, FastCluster, DAG);
+ }
}
//===----------------------------------------------------------------------===//