diff options
author | Jay Foad <jay.foad@amd.com> | 2020-01-06 11:22:51 +0000 |
---|---|---|
committer | Jay Foad <jay.foad@amd.com> | 2020-01-22 14:28:24 +0000 |
commit | e0f0d0e55cc7d389ad0692fbc9678e7895978355 (patch) | |
tree | dbf214c788e8e0a01cc3dbca418d8336fee60eff /llvm/lib/CodeGen/MachineScheduler.cpp | |
parent | 70096ca111ee2848fb2e29a7cb3e4fb7e3ba9ef9 (diff) | |
download | llvm-e0f0d0e55cc7d389ad0692fbc9678e7895978355.zip llvm-e0f0d0e55cc7d389ad0692fbc9678e7895978355.tar.gz llvm-e0f0d0e55cc7d389ad0692fbc9678e7895978355.tar.bz2 |
[MachineScheduler] Allow clustering mem ops with complex addresses
The generic BaseMemOpClusterMutation calls into TargetInstrInfo to
analyze the address of each load/store instruction, and again to decide
whether two instructions should be clustered. Previously this had to
represent each address as a single base operand plus a constant byte
offset. This patch extends it to support any number of base operands.
The old target hook getMemOperandWithOffset is now a convenience
function for callers that are only prepared to handle a single base
operand. It calls the new more general target hook
getMemOperandsWithOffset.
The only requirements for the base operands returned by
getMemOperandsWithOffset are:
- they can be sorted by MemOpInfo::Compare, such that clusterable ops
get sorted next to each other, and
- shouldClusterMemOps knows what they mean.
One simple follow-on is to enable clustering of AMDGPU FLAT instructions
with both vaddr and saddr (base register + offset register). I've left
a FIXME in the code for this case.
Differential Revision: https://reviews.llvm.org/D71655
Diffstat (limited to 'llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachineScheduler.cpp | 69 |
1 files changed, 39 insertions, 30 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 52ab018..7de1a5f 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1471,41 +1471,46 @@ namespace { class BaseMemOpClusterMutation : public ScheduleDAGMutation { struct MemOpInfo { SUnit *SU; - const MachineOperand *BaseOp; + SmallVector<const MachineOperand *, 4> BaseOps; int64_t Offset; - MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs) - : SU(su), BaseOp(Op), Offset(ofs) {} - - bool operator<(const MemOpInfo &RHS) const { - if (BaseOp->getType() != RHS.BaseOp->getType()) - return BaseOp->getType() < RHS.BaseOp->getType(); - - if (BaseOp->isReg()) - return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < - std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, - RHS.SU->NodeNum); - if (BaseOp->isFI()) { - const MachineFunction &MF = - *BaseOp->getParent()->getParent()->getParent(); + MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps, + int64_t Offset) + : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset) {} + + static bool Compare(const MachineOperand *const &A, + const MachineOperand *const &B) { + if (A->getType() != B->getType()) + return A->getType() < B->getType(); + if (A->isReg()) + return A->getReg() < B->getReg(); + if (A->isFI()) { + const MachineFunction &MF = *A->getParent()->getParent()->getParent(); const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; - // Can't use tuple comparison here since we might need to use a - // different order when the stack grows down. - if (BaseOp->getIndex() != RHS.BaseOp->getIndex()) - return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex() - : BaseOp->getIndex() < RHS.BaseOp->getIndex(); - - if (Offset != RHS.Offset) - return Offset < RHS.Offset; - - return SU->NodeNum < RHS.SU->NodeNum; + return StackGrowsDown ? A->getIndex() > B->getIndex() + : A->getIndex() < B->getIndex(); } llvm_unreachable("MemOpClusterMutation only supports register or frame " "index bases."); } + + bool operator<(const MemOpInfo &RHS) const { + // FIXME: Don't compare everything twice. Maybe use C++20 three way + // comparison instead when it's available. + if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(), + RHS.BaseOps.begin(), RHS.BaseOps.end(), + Compare)) + return true; + if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(), + BaseOps.begin(), BaseOps.end(), Compare)) + return false; + if (Offset != RHS.Offset) + return Offset < RHS.Offset; + return SU->NodeNum < RHS.SU->NodeNum; + } }; const TargetInstrInfo *TII; @@ -1560,10 +1565,14 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) { SmallVector<MemOpInfo, 32> MemOpRecords; for (SUnit *SU : MemOps) { - const MachineOperand *BaseOp; + SmallVector<const MachineOperand *, 4> BaseOps; int64_t Offset; - if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI)) - MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset)); + if (TII->getMemOperandsWithOffset(*SU->getInstr(), BaseOps, Offset, TRI)) + MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset)); +#ifndef NDEBUG + for (auto *Op : BaseOps) + assert(Op); +#endif } if (MemOpRecords.size() < 2) return; @@ -1573,8 +1582,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { SUnit *SUa = MemOpRecords[Idx].SU; SUnit *SUb = MemOpRecords[Idx+1].SU; - if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp, - *MemOpRecords[Idx + 1].BaseOp, + if (TII->shouldClusterMemOps(MemOpRecords[Idx].BaseOps, + MemOpRecords[Idx + 1].BaseOps, ClusterLength)) { if (SUa->NodeNum > SUb->NodeNum) std::swap(SUa, SUb); |