aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachineScheduler.cpp
diff options
context:
space:
mode:
authorhsmahesha <mahesha.comp@gmail.com>2020-05-26 15:47:03 +0530
committerhsmahesha <mahesha.comp@gmail.com>2020-05-26 15:49:21 +0530
commit09f7dcb64e1b2a3568ddb6ab327dd2f4a4d3d0fe (patch)
tree56a5fe43d18a9813a7972f5fa17e5b8e6d88909f /llvm/lib/CodeGen/MachineScheduler.cpp
parent6f802ec4333cc1227bb37e258a81e9a588f964dc (diff)
downloadllvm-09f7dcb64e1b2a3568ddb6ab327dd2f4a4d3d0fe.zip
llvm-09f7dcb64e1b2a3568ddb6ab327dd2f4a4d3d0fe.tar.gz
llvm-09f7dcb64e1b2a3568ddb6ab327dd2f4a4d3d0fe.tar.bz2
[AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic
Summary: Clean-up code around mem ops clustering logic. This patch cleans up code within the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut. Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar Reviewed By: foad Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80119
Diffstat (limited to 'llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp64
1 files changed, 39 insertions, 25 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 55b0075..92fd3ed 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1580,34 +1580,48 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
return;
llvm::sort(MemOpRecords);
+
+ // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
+ // cluster mem ops collected within `MemOpRecords` array.
unsigned ClusterLength = 1;
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
- SUnit *SUa = MemOpRecords[Idx].SU;
- SUnit *SUb = MemOpRecords[Idx+1].SU;
- if (TII->shouldClusterMemOps(MemOpRecords[Idx].BaseOps,
- MemOpRecords[Idx + 1].BaseOps,
- ClusterLength + 1)) {
- if (SUa->NodeNum > SUb->NodeNum)
- std::swap(SUa, SUb);
- if (DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
- LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
- << SUb->NodeNum << ")\n");
- // Copy successor edges from SUa to SUb. Interleaving computation
- // dependent on SUa can prevent load combining due to register reuse.
- // Predecessor edges do not need to be copied from SUb to SUa since
- // nearby loads should have effectively the same inputs.
- for (const SDep &Succ : SUa->Succs) {
- if (Succ.getSUnit() == SUb)
- continue;
- LLVM_DEBUG(dbgs()
- << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
- DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
- }
- ++ClusterLength;
- } else
- ClusterLength = 1;
- } else
+ // Decision to cluster mem ops is taken based on target dependent logic
+ auto MemOpa = MemOpRecords[Idx];
+ auto MemOpb = MemOpRecords[Idx + 1];
+ ++ClusterLength;
+ if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps,
+ ClusterLength)) {
+ // Current mem ops pair could not be clustered, reset cluster length, and
+ // go to next pair
+ ClusterLength = 1;
+ continue;
+ }
+
+ SUnit *SUa = MemOpa.SU;
+ SUnit *SUb = MemOpb.SU;
+ if (SUa->NodeNum > SUb->NodeNum)
+ std::swap(SUa, SUb);
+
+ // FIXME: Is this check really required?
+ if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
ClusterLength = 1;
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
+ << SUb->NodeNum << ")\n");
+
+ // Copy successor edges from SUa to SUb. Interleaving computation
+ // dependent on SUa can prevent load combining due to register reuse.
+ // Predecessor edges do not need to be copied from SUb to SUa since
+ // nearby loads should have effectively the same inputs.
+ for (const SDep &Succ : SUa->Succs) {
+ if (Succ.getSUnit() == SUb)
+ continue;
+ LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
+ << ")\n");
+ DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
+ }
}
}