aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachineScheduler.cpp
diff options
context:
space:
mode:
authorJay Foad <jay.foad@amd.com>2020-01-06 11:22:51 +0000
committerJay Foad <jay.foad@amd.com>2020-01-22 14:28:24 +0000
commite0f0d0e55cc7d389ad0692fbc9678e7895978355 (patch)
treedbf214c788e8e0a01cc3dbca418d8336fee60eff /llvm/lib/CodeGen/MachineScheduler.cpp
parent70096ca111ee2848fb2e29a7cb3e4fb7e3ba9ef9 (diff)
downloadllvm-e0f0d0e55cc7d389ad0692fbc9678e7895978355.zip
llvm-e0f0d0e55cc7d389ad0692fbc9678e7895978355.tar.gz
llvm-e0f0d0e55cc7d389ad0692fbc9678e7895978355.tar.bz2
[MachineScheduler] Allow clustering mem ops with complex addresses
The generic BaseMemOpClusterMutation calls into TargetInstrInfo to analyze the address of each load/store instruction, and again to decide whether two instructions should be clustered. Previously this had to represent each address as a single base operand plus a constant byte offset. This patch extends it to support any number of base operands. The old target hook getMemOperandWithOffset is now a convenience function for callers that are only prepared to handle a single base operand. It calls the new more general target hook getMemOperandsWithOffset. The only requirements for the base operands returned by getMemOperandsWithOffset are: - they can be sorted by MemOpInfo::Compare, such that clusterable ops get sorted next to each other, and - shouldClusterMemOps knows what they mean. One simple follow-on is to enable clustering of AMDGPU FLAT instructions with both vaddr and saddr (base register + offset register). I've left a FIXME in the code for this case. Differential Revision: https://reviews.llvm.org/D71655
Diffstat (limited to 'llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp69
1 files changed, 39 insertions, 30 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 52ab018..7de1a5f 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1471,41 +1471,46 @@ namespace {
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
struct MemOpInfo {
SUnit *SU;
- const MachineOperand *BaseOp;
+ SmallVector<const MachineOperand *, 4> BaseOps;
int64_t Offset;
- MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
- : SU(su), BaseOp(Op), Offset(ofs) {}
-
- bool operator<(const MemOpInfo &RHS) const {
- if (BaseOp->getType() != RHS.BaseOp->getType())
- return BaseOp->getType() < RHS.BaseOp->getType();
-
- if (BaseOp->isReg())
- return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
- std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
- RHS.SU->NodeNum);
- if (BaseOp->isFI()) {
- const MachineFunction &MF =
- *BaseOp->getParent()->getParent()->getParent();
+ MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
+ int64_t Offset)
+ : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset) {}
+
+ static bool Compare(const MachineOperand *const &A,
+ const MachineOperand *const &B) {
+ if (A->getType() != B->getType())
+ return A->getType() < B->getType();
+ if (A->isReg())
+ return A->getReg() < B->getReg();
+ if (A->isFI()) {
+ const MachineFunction &MF = *A->getParent()->getParent()->getParent();
const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
bool StackGrowsDown = TFI.getStackGrowthDirection() ==
TargetFrameLowering::StackGrowsDown;
- // Can't use tuple comparison here since we might need to use a
- // different order when the stack grows down.
- if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
- return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
- : BaseOp->getIndex() < RHS.BaseOp->getIndex();
-
- if (Offset != RHS.Offset)
- return Offset < RHS.Offset;
-
- return SU->NodeNum < RHS.SU->NodeNum;
+ return StackGrowsDown ? A->getIndex() > B->getIndex()
+ : A->getIndex() < B->getIndex();
}
llvm_unreachable("MemOpClusterMutation only supports register or frame "
"index bases.");
}
+
+ bool operator<(const MemOpInfo &RHS) const {
+ // FIXME: Don't compare everything twice. Maybe use C++20 three way
+ // comparison instead when it's available.
+ if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
+ RHS.BaseOps.begin(), RHS.BaseOps.end(),
+ Compare))
+ return true;
+ if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
+ BaseOps.begin(), BaseOps.end(), Compare))
+ return false;
+ if (Offset != RHS.Offset)
+ return Offset < RHS.Offset;
+ return SU->NodeNum < RHS.SU->NodeNum;
+ }
};
const TargetInstrInfo *TII;
@@ -1560,10 +1565,14 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
SmallVector<MemOpInfo, 32> MemOpRecords;
for (SUnit *SU : MemOps) {
- const MachineOperand *BaseOp;
+ SmallVector<const MachineOperand *, 4> BaseOps;
int64_t Offset;
- if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
- MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
+ if (TII->getMemOperandsWithOffset(*SU->getInstr(), BaseOps, Offset, TRI))
+ MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset));
+#ifndef NDEBUG
+ for (auto *Op : BaseOps)
+ assert(Op);
+#endif
}
if (MemOpRecords.size() < 2)
return;
@@ -1573,8 +1582,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
SUnit *SUa = MemOpRecords[Idx].SU;
SUnit *SUb = MemOpRecords[Idx+1].SU;
- if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
- *MemOpRecords[Idx + 1].BaseOp,
+ if (TII->shouldClusterMemOps(MemOpRecords[Idx].BaseOps,
+ MemOpRecords[Idx + 1].BaseOps,
ClusterLength)) {
if (SUa->NodeNum > SUb->NodeNum)
std::swap(SUa, SUb);