diff options
author | QingShan Zhang <qshanz@cn.ibm.com> | 2020-08-07 04:53:37 +0000 |
---|---|---|
committer | QingShan Zhang <qshanz@cn.ibm.com> | 2020-08-07 04:58:03 +0000 |
commit | 3359ea62edcc5f1d5831bebc2075746031cd22c5 (patch) | |
tree | 889cfd698b51381cb4686244af60a2a1283aa722 /llvm/lib/CodeGen/MachineScheduler.cpp | |
parent | 96b02808afa7eb043b9968b07424cc96bc8d94a6 (diff) | |
download | llvm-3359ea62edcc5f1d5831bebc2075746031cd22c5.zip llvm-3359ea62edcc5f1d5831bebc2075746031cd22c5.tar.gz llvm-3359ea62edcc5f1d5831bebc2075746031cd22c5.tar.bz2 |
[Scheduling] Create the missing dependency edges for store cluster
If it is load cluster, we don't need to create the dependency edges(SUb->reg) from SUb to SUa
as they both depend on the base register "reg"
+-------+
+----> reg |
| +---+---+
| ^
| |
| |
| |
| +---+---+
| | SUa | Load 0(reg)
| +---+---+
| ^
| |
| |
| +---+---+
+----+ SUb | Load 4(reg)
+-------+
But if it is store cluster, we need to create it as follow shows to avoid the instruction store
depend on scheduled in-between SUb and SUa.
+-------+
+----> reg |
| +---+---+
| ^
| | Missing +-------+
| | +-------------------->+ y |
| | | +---+---+
| +---+-+-+ ^
| | SUa | Store x 0(reg) |
| +---+---+ |
| ^ |
| | +------------------------+
| | |
| +---+--++
+----+ SUb | Store y 4(reg)
+-------+
Reviewed By: evandro, arsenm, rampitec, foad, fhahn
Differential Revision: https://reviews.llvm.org/D72031
Diffstat (limited to 'llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachineScheduler.cpp | 36 |
1 files changed, 26 insertions, 10 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 7daaa35..fe4ceb2 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1624,16 +1624,32 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" << SUb->NodeNum << ")\n"); - // Copy successor edges from SUa to SUb. Interleaving computation - // dependent on SUa can prevent load combining due to register reuse. - // Predecessor edges do not need to be copied from SUb to SUa since - // nearby loads should have effectively the same inputs. - for (const SDep &Succ : SUa->Succs) { - if (Succ.getSUnit() == SUb) - continue; - LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum - << ")\n"); - DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); + if (IsLoad) { + // Copy successor edges from SUa to SUb. Interleaving computation + // dependent on SUa can prevent load combining due to register reuse. + // Predecessor edges do not need to be copied from SUb to SUa since + // nearby loads should have effectively the same inputs. + for (const SDep &Succ : SUa->Succs) { + if (Succ.getSUnit() == SUb) + continue; + LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum + << ")\n"); + DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); + } + } else { + // Copy predecessor edges from SUb to SUa to avoid the SUnits that + // SUb dependent on scheduled in-between SUb and SUa. Successor edges + // do not need to be copied from SUa to SUb since no one will depend + // on stores. + // Notice that, we don't need to care about the memory dependency as + // we won't try to cluster them if they have any memory dependency. + for (const SDep &Pred : SUb->Preds) { + if (Pred.getSUnit() == SUa) + continue; + LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum + << ")\n"); + DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial)); + } } LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength |