aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachineSink.cpp
diff options
context:
space:
mode:
authorMomchil Velikov <momchil.velikov@arm.com>2023-09-25 09:36:04 +0100
committerMomchil Velikov <momchil.velikov@arm.com>2023-09-25 10:49:44 +0100
commitc649fd34e928ad01951cbff298c5c44853dd41dd (patch)
treeca396c453bd8ec3be812eee9f4bfef458b85ab27 /llvm/lib/CodeGen/MachineSink.cpp
parent0bfaed8c612705cfa8c5382d26d8089a0a26386b (diff)
downloadllvm-c649fd34e928ad01951cbff298c5c44853dd41dd.zip
llvm-c649fd34e928ad01951cbff298c5c44853dd41dd.tar.gz
llvm-c649fd34e928ad01951cbff298c5c44853dd41dd.tar.bz2
[MachineSink][AArch64] Sink instruction copies when they can replace copy into hard register or folded into addressing mode
This patch adds a new code transformation to the `MachineSink` pass, that tries to sink copies of an instruction, when the copies can be folded into the addressing modes of load/store instructions, or replace another instruction (currently, copies into a hard register). The criteria for performing the transformation is that: * the register pressure at the sink destination block must not exceed the register pressure limits * the latency and throughput of the load/store or the copy must not deteriorate * the original instruction must be deleted Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D152828
Diffstat (limited to 'llvm/lib/CodeGen/MachineSink.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineSink.cpp297
1 files changed, 268 insertions, 29 deletions
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index b4cbb93..480ac23 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -41,6 +41,7 @@
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
@@ -115,6 +116,7 @@ STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
namespace {
class MachineSinking : public MachineFunctionPass {
+ const TargetSubtargetInfo *STI = nullptr;
const TargetInstrInfo *TII = nullptr;
const TargetRegisterInfo *TRI = nullptr;
MachineRegisterInfo *MRI = nullptr; // Machine register information
@@ -165,7 +167,10 @@ namespace {
StoreInstrCache;
/// Cached BB's register pressure.
- std::map<MachineBasicBlock *, std::vector<unsigned>> CachedRegisterPressure;
+ std::map<const MachineBasicBlock *, std::vector<unsigned>>
+ CachedRegisterPressure;
+
+ bool EnableSinkAndFold;
public:
static char ID; // Pass identification
@@ -187,6 +192,7 @@ namespace {
AU.addPreserved<MachineLoopInfo>();
if (UseBlockFreqInfo)
AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<TargetPassConfig>();
}
void releaseMemory() override {
@@ -246,11 +252,17 @@ namespace {
bool PerformTrivialForwardCoalescing(MachineInstr &MI,
MachineBasicBlock *MBB);
+ bool PerformSinkAndFold(MachineInstr &MI, MachineBasicBlock *MBB);
+
SmallVector<MachineBasicBlock *, 4> &
GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccsCache &AllSuccessors) const;
- std::vector<unsigned> &getBBRegisterPressure(MachineBasicBlock &MBB);
+ std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB);
+
+ bool registerPressureSetExceedsLimit(unsigned NRegs,
+ const TargetRegisterClass *RC,
+ const MachineBasicBlock &MBB);
};
} // end anonymous namespace
@@ -338,6 +350,224 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
return true;
}
+bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
+ MachineBasicBlock *MBB) {
+ if (MI.isCopy() || MI.mayLoadOrStore() ||
+ MI.getOpcode() == TargetOpcode::REG_SEQUENCE)
+ return false;
+
+ // Don't sink instructions that the target prefers not to sink.
+ if (!TII->shouldSink(MI))
+ return false;
+
+ // Check if it's safe to move the instruction.
+ bool SawStore = true;
+ if (!MI.isSafeToMove(AA, SawStore))
+ return false;
+
+ // Convergent operations may not be made control-dependent on additional
+ // values.
+ if (MI.isConvergent())
+ return false;
+
+ // Don't sink defs/uses of hard registers or if the instruction defines more
+ // than one register.
+ // Don't sink more than two register uses - it'll cover most of the cases and
+ // greatly simplifies the register pressure checks.
+ Register DefReg;
+ Register UsedRegA, UsedRegB;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isImm() || MO.isRegMask() || MO.isRegLiveOut() || MO.isMetadata() ||
+ MO.isMCSymbol() || MO.isDbgInstrRef() || MO.isCFIIndex() ||
+ MO.isIntrinsicID() || MO.isPredicate() || MO.isShuffleMask())
+ continue;
+ if (!MO.isReg())
+ return false;
+
+ Register Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ if (Reg.isVirtual()) {
+ if (MO.isDef()) {
+ if (DefReg)
+ return false;
+ DefReg = Reg;
+ continue;
+ }
+
+ if (UsedRegA == 0)
+ UsedRegA = Reg;
+ else if (UsedRegB == 0)
+ UsedRegB = Reg;
+ else
+ return false;
+ continue;
+ }
+
+ if (Reg.isPhysical() &&
+ (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
+ continue;
+
+ return false;
+ }
+
+ // Scan uses of the destination register. Every use, except the last, must be
+ // a copy, with a chain of copies terminating with either a copy into a hard
+ // register, or a load/store instruction where the use is part of the
+ // address (*not* the stored value).
+ using SinkInfo = std::pair<MachineInstr *, ExtAddrMode>;
+ SmallVector<SinkInfo> SinkInto;
+ SmallVector<Register> Worklist;
+
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ const TargetRegisterClass *RCA =
+ UsedRegA == 0 ? nullptr : MRI->getRegClass(UsedRegA);
+ const TargetRegisterClass *RCB =
+ UsedRegB == 0 ? nullptr : MRI->getRegClass(UsedRegB);
+
+ Worklist.push_back(DefReg);
+ while (!Worklist.empty()) {
+ Register Reg = Worklist.pop_back_val();
+
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ ExtAddrMode MaybeAM;
+ MachineInstr &UseInst = *MO.getParent();
+ if (UseInst.isCopy()) {
+ Register DstReg;
+ if (const MachineOperand &O = UseInst.getOperand(0); O.isReg())
+ DstReg = O.getReg();
+ if (DstReg == 0)
+ return false;
+ if (DstReg.isVirtual()) {
+ Worklist.push_back(DstReg);
+ continue;
+ }
+ // If we are going to replace a copy, the original instruction must be
+ // as cheap as a copy.
+ if (!TII->isAsCheapAsAMove(MI))
+ return false;
+ // The hard register must be in the register class of the original
+ // instruction's destination register.
+ if (!RC->contains(DstReg))
+ return false;
+ } else if (UseInst.mayLoadOrStore()) {
+ ExtAddrMode AM;
+ if (!TII->canFoldIntoAddrMode(UseInst, Reg, MI, AM))
+ return false;
+ MaybeAM = AM;
+ } else {
+ return false;
+ }
+
+ if (UseInst.getParent() != MI.getParent()) {
+ // If the register class of the register we are replacingis a superset
+ // of any of the register classes of the operands of the materialized
+ // instruction don't consider that live range extended.
+ const TargetRegisterClass *RCS = MRI->getRegClass(Reg);
+ if (RCA && RCA->hasSuperClassEq(RCS))
+ RCA = nullptr;
+ else if (RCB && RCB->hasSuperClassEq(RCS))
+ RCB = nullptr;
+ if (RCA || RCB) {
+ if (RCA == nullptr) {
+ RCA = RCB;
+ RCB = nullptr;
+ }
+
+ unsigned NRegs = !!RCA + !!RCB;
+ if (RCA == RCB)
+ RCB = nullptr;
+
+ // Check we don't exceed register pressure at the destination.
+ const MachineBasicBlock &MBB = *UseInst.getParent();
+ if (RCB == nullptr) {
+ if (registerPressureSetExceedsLimit(NRegs, RCA, MBB))
+ return false;
+ } else if (registerPressureSetExceedsLimit(1, RCA, MBB) ||
+ registerPressureSetExceedsLimit(1, RCB, MBB)) {
+ return false;
+ }
+ }
+ }
+
+ SinkInto.emplace_back(&UseInst, MaybeAM);
+ }
+ }
+
+ if (SinkInto.empty())
+ return false;
+
+ // Now we know we can fold the instruction in all its users.
+ if (UsedRegA)
+ MRI->clearKillFlags(UsedRegA);
+ if (UsedRegB)
+ MRI->clearKillFlags(UsedRegB);
+
+ for (auto &[SinkDst, MaybeAM] : SinkInto) {
+ MachineInstr *New = nullptr;
+ LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into";
+ SinkDst->dump());
+ if (SinkDst->isCopy()) {
+ // Sink a copy of the instruction, replacing a COPY instruction.
+ MachineBasicBlock::iterator InsertPt = SinkDst->getIterator();
+ Register DstReg = SinkDst->getOperand(0).getReg();
+ TII->reMaterialize(*SinkDst->getParent(), InsertPt, DstReg, 0, MI, *TRI);
+ // If the original instruction did not have source location, reuse a one
+ // from the COPY.
+ New = &*std::prev(InsertPt);
+ if (const DebugLoc &NewLoc = New->getDebugLoc(); !NewLoc)
+ New->setDebugLoc(SinkDst->getDebugLoc());
+ // Sink DBG_VALUEs, which refer to the original instruction's destination
+ // (DefReg).
+ MachineBasicBlock &SinkMBB = *SinkDst->getParent();
+ auto &DbgUsers = SeenDbgUsers[DefReg];
+ for (auto &U : DbgUsers) {
+ MachineInstr *DbgMI = U.getPointer();
+ if (U.getInt())
+ continue;
+ MachineInstr *NewDbgMI = SinkDst->getMF()->CloneMachineInstr(DbgMI);
+ NewDbgMI->getOperand(0).setReg(DstReg);
+ SinkMBB.insertAfter(InsertPt, NewDbgMI);
+ }
+ } else {
+ // Fold instruction into the addressing mode of a memory instruction.
+ New = TII->emitLdStWithAddr(*SinkDst, MaybeAM);
+ }
+ LLVM_DEBUG(dbgs() << "yielding"; New->dump());
+ SinkDst->eraseFromParent();
+ }
+
+ MI.eraseFromParent();
+
+ // Collect instructions that need to be deleted (COPYs). We cannot delete them
+ // while traversing register uses.
+ SmallVector<MachineInstr *> CleanupInstrs;
+ Worklist.push_back(DefReg);
+ while (!Worklist.empty()) {
+ Register Reg = Worklist.pop_back_val();
+
+ for (MachineOperand &MO : MRI->use_operands(Reg)) {
+ MachineInstr *U = MO.getParent();
+ assert((U->isCopy() || U->isDebugInstr()) &&
+ "Only debug uses and copies must remain");
+ if (U->isCopy()) {
+ Worklist.push_back(U->getOperand(0).getReg());
+ CleanupInstrs.push_back(U);
+ } else {
+ MO.setReg(0);
+ MO.setSubReg(0);
+ }
+ }
+ }
+
+ // Delete the dead COPYs.
+ for (MachineInstr *Del : CleanupInstrs)
+ Del->eraseFromParent();
+
+ return true;
+}
+
/// AllUsesDominatedByBlock - Return true if all uses of the specified register
/// occur in blocks dominated by the specified block. If any use is in the
/// definition block, then return false since it is never legal to move def
@@ -461,8 +691,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
- TII = MF.getSubtarget().getInstrInfo();
- TRI = MF.getSubtarget().getRegisterInfo();
+ STI = &MF.getSubtarget();
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
MRI = &MF.getRegInfo();
DT = &getAnalysis<MachineDominatorTree>();
PDT = &getAnalysis<MachinePostDominatorTree>();
@@ -471,6 +702,8 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
RegClassInfo.runOnMachineFunction(MF);
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ EnableSinkAndFold = PassConfig->getEnableSinkAndFold();
bool EverMadeChange = false;
@@ -547,8 +780,8 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
}
bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
- // Can't sink anything out of a block that has less than two successors.
- if (MBB.succ_size() <= 1 || MBB.empty()) return false;
+ if ((!EnableSinkAndFold && MBB.succ_size() <= 1) || MBB.empty())
+ return false;
// Don't bother sinking code out of unreachable blocks. In addition to being
// unprofitable, it can also lead to infinite looping, because in an
@@ -579,8 +812,16 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
continue;
}
- bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
- if (Joined) {
+ if (EnableSinkAndFold && PerformSinkAndFold(MI, &MBB)) {
+ MadeChange = true;
+ continue;
+ }
+
+ // Can't sink anything out of a block that has less than two successors.
+ if (MBB.succ_size() <= 1)
+ continue;
+
+ if (PerformTrivialForwardCoalescing(MI, &MBB)) {
MadeChange = true;
continue;
}
@@ -597,7 +838,6 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
SeenDbgVars.clear();
// recalculate the bb register pressure after sinking one BB.
CachedRegisterPressure.clear();
-
return MadeChange;
}
@@ -737,7 +977,7 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
}
std::vector<unsigned> &
-MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
+MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
// Currently to save compiling time, MBB's register pressure will not change
// in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
// register pressure is changed after sinking any instructions into it.
@@ -753,10 +993,10 @@ MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(),
/*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
- for (MachineBasicBlock::iterator MII = MBB.instr_end(),
- MIE = MBB.instr_begin();
+ for (MachineBasicBlock::const_iterator MII = MBB.instr_end(),
+ MIE = MBB.instr_begin();
MII != MIE; --MII) {
- MachineInstr &MI = *std::prev(MII);
+ const MachineInstr &MI = *std::prev(MII);
if (MI.isDebugInstr() || MI.isPseudoProbe())
continue;
RegisterOperands RegOpers;
@@ -772,6 +1012,19 @@ MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
return It.first->second;
}
+bool MachineSinking::registerPressureSetExceedsLimit(
+ unsigned NRegs, const TargetRegisterClass *RC,
+ const MachineBasicBlock &MBB) {
+ unsigned Weight = NRegs * TRI->getRegClassWeight(RC).RegWeight;
+ const int *PS = TRI->getRegClassPressureSets(RC);
+ std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB);
+ for (; *PS != -1; PS++)
+ if (Weight + BBRegisterPressure[*PS] >=
+ TRI->getRegPressureSetLimit(*MBB.getParent(), *PS))
+ return true;
+ return false;
+}
+
/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
@@ -816,21 +1069,6 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
if (!MCycle)
return false;
- auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
- unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
- const int *PS = TRI->getRegClassPressureSets(RC);
- // Get register pressure for block SuccToSinkTo.
- std::vector<unsigned> BBRegisterPressure =
- getBBRegisterPressure(*SuccToSinkTo);
- for (; *PS != -1; PS++)
- // check if any register pressure set exceeds limit in block SuccToSinkTo
- // after sinking.
- if (Weight + BBRegisterPressure[*PS] >=
- TRI->getRegPressureSetLimit(*MBB->getParent(), *PS))
- return true;
- return false;
- };
-
// If this instruction is inside a Cycle and sinking this instruction can make
// more registers live range shorten, it is still prifitable.
for (const MachineOperand &MO : MI.operands()) {
@@ -870,7 +1108,8 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
// The DefMI is defined inside the cycle.
// If sinking this operand makes some register pressure set exceed limit,
// it is not profitable.
- if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
+ if (registerPressureSetExceedsLimit(1, MRI->getRegClass(Reg),
+ *SuccToSinkTo)) {
LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
return false;
}