diff options
author | Momchil Velikov <momchil.velikov@arm.com> | 2023-09-25 09:36:04 +0100 |
---|---|---|
committer | Momchil Velikov <momchil.velikov@arm.com> | 2023-09-25 10:49:44 +0100 |
commit | c649fd34e928ad01951cbff298c5c44853dd41dd (patch) | |
tree | ca396c453bd8ec3be812eee9f4bfef458b85ab27 /llvm/lib/CodeGen/MachineSink.cpp | |
parent | 0bfaed8c612705cfa8c5382d26d8089a0a26386b (diff) | |
download | llvm-c649fd34e928ad01951cbff298c5c44853dd41dd.zip llvm-c649fd34e928ad01951cbff298c5c44853dd41dd.tar.gz llvm-c649fd34e928ad01951cbff298c5c44853dd41dd.tar.bz2 |
[MachineSink][AArch64] Sink instruction copies when they can replace copy into hard register or folded into addressing mode
This patch adds a new code transformation to the `MachineSink` pass,
that tries to sink copies of an instruction, when the copies can be folded
into the addressing modes of load/store instructions, or
replace another instruction (currently, copies into a hard register).
The criteria for performing the transformation is that:
* the register pressure at the sink destination block must not
exceed the register pressure limits
* the latency and throughput of the load/store or the copy must not deteriorate
* the original instruction must be deleted
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D152828
Diffstat (limited to 'llvm/lib/CodeGen/MachineSink.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachineSink.cpp | 297 |
1 files changed, 268 insertions, 29 deletions
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index b4cbb93..480ac23 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -41,6 +41,7 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" @@ -115,6 +116,7 @@ STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); namespace { class MachineSinking : public MachineFunctionPass { + const TargetSubtargetInfo *STI = nullptr; const TargetInstrInfo *TII = nullptr; const TargetRegisterInfo *TRI = nullptr; MachineRegisterInfo *MRI = nullptr; // Machine register information @@ -165,7 +167,10 @@ namespace { StoreInstrCache; /// Cached BB's register pressure. - std::map<MachineBasicBlock *, std::vector<unsigned>> CachedRegisterPressure; + std::map<const MachineBasicBlock *, std::vector<unsigned>> + CachedRegisterPressure; + + bool EnableSinkAndFold; public: static char ID; // Pass identification @@ -187,6 +192,7 @@ namespace { AU.addPreserved<MachineLoopInfo>(); if (UseBlockFreqInfo) AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<TargetPassConfig>(); } void releaseMemory() override { @@ -246,11 +252,17 @@ namespace { bool PerformTrivialForwardCoalescing(MachineInstr &MI, MachineBasicBlock *MBB); + bool PerformSinkAndFold(MachineInstr &MI, MachineBasicBlock *MBB); + SmallVector<MachineBasicBlock *, 4> & GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const; - std::vector<unsigned> &getBBRegisterPressure(MachineBasicBlock &MBB); + std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB); + + bool registerPressureSetExceedsLimit(unsigned NRegs, + const TargetRegisterClass *RC, + const MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -338,6 +350,224 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, return true; } +bool MachineSinking::PerformSinkAndFold(MachineInstr &MI, + MachineBasicBlock *MBB) { + if (MI.isCopy() || MI.mayLoadOrStore() || + MI.getOpcode() == TargetOpcode::REG_SEQUENCE) + return false; + + // Don't sink instructions that the target prefers not to sink. + if (!TII->shouldSink(MI)) + return false; + + // Check if it's safe to move the instruction. + bool SawStore = true; + if (!MI.isSafeToMove(AA, SawStore)) + return false; + + // Convergent operations may not be made control-dependent on additional + // values. + if (MI.isConvergent()) + return false; + + // Don't sink defs/uses of hard registers or if the instruction defines more + // than one register. + // Don't sink more than two register uses - it'll cover most of the cases and + // greatly simplifies the register pressure checks. + Register DefReg; + Register UsedRegA, UsedRegB; + for (const MachineOperand &MO : MI.operands()) { + if (MO.isImm() || MO.isRegMask() || MO.isRegLiveOut() || MO.isMetadata() || + MO.isMCSymbol() || MO.isDbgInstrRef() || MO.isCFIIndex() || + MO.isIntrinsicID() || MO.isPredicate() || MO.isShuffleMask()) + continue; + if (!MO.isReg()) + return false; + + Register Reg = MO.getReg(); + if (Reg == 0) + continue; + + if (Reg.isVirtual()) { + if (MO.isDef()) { + if (DefReg) + return false; + DefReg = Reg; + continue; + } + + if (UsedRegA == 0) + UsedRegA = Reg; + else if (UsedRegB == 0) + UsedRegB = Reg; + else + return false; + continue; + } + + if (Reg.isPhysical() && + (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO))) + continue; + + return false; + } + + // Scan uses of the destination register. Every use, except the last, must be + // a copy, with a chain of copies terminating with either a copy into a hard + // register, or a load/store instruction where the use is part of the + // address (*not* the stored value). + using SinkInfo = std::pair<MachineInstr *, ExtAddrMode>; + SmallVector<SinkInfo> SinkInto; + SmallVector<Register> Worklist; + + const TargetRegisterClass *RC = MRI->getRegClass(DefReg); + const TargetRegisterClass *RCA = + UsedRegA == 0 ? nullptr : MRI->getRegClass(UsedRegA); + const TargetRegisterClass *RCB = + UsedRegB == 0 ? nullptr : MRI->getRegClass(UsedRegB); + + Worklist.push_back(DefReg); + while (!Worklist.empty()) { + Register Reg = Worklist.pop_back_val(); + + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + ExtAddrMode MaybeAM; + MachineInstr &UseInst = *MO.getParent(); + if (UseInst.isCopy()) { + Register DstReg; + if (const MachineOperand &O = UseInst.getOperand(0); O.isReg()) + DstReg = O.getReg(); + if (DstReg == 0) + return false; + if (DstReg.isVirtual()) { + Worklist.push_back(DstReg); + continue; + } + // If we are going to replace a copy, the original instruction must be + // as cheap as a copy. + if (!TII->isAsCheapAsAMove(MI)) + return false; + // The hard register must be in the register class of the original + // instruction's destination register. + if (!RC->contains(DstReg)) + return false; + } else if (UseInst.mayLoadOrStore()) { + ExtAddrMode AM; + if (!TII->canFoldIntoAddrMode(UseInst, Reg, MI, AM)) + return false; + MaybeAM = AM; + } else { + return false; + } + + if (UseInst.getParent() != MI.getParent()) { + // If the register class of the register we are replacingis a superset + // of any of the register classes of the operands of the materialized + // instruction don't consider that live range extended. + const TargetRegisterClass *RCS = MRI->getRegClass(Reg); + if (RCA && RCA->hasSuperClassEq(RCS)) + RCA = nullptr; + else if (RCB && RCB->hasSuperClassEq(RCS)) + RCB = nullptr; + if (RCA || RCB) { + if (RCA == nullptr) { + RCA = RCB; + RCB = nullptr; + } + + unsigned NRegs = !!RCA + !!RCB; + if (RCA == RCB) + RCB = nullptr; + + // Check we don't exceed register pressure at the destination. + const MachineBasicBlock &MBB = *UseInst.getParent(); + if (RCB == nullptr) { + if (registerPressureSetExceedsLimit(NRegs, RCA, MBB)) + return false; + } else if (registerPressureSetExceedsLimit(1, RCA, MBB) || + registerPressureSetExceedsLimit(1, RCB, MBB)) { + return false; + } + } + } + + SinkInto.emplace_back(&UseInst, MaybeAM); + } + } + + if (SinkInto.empty()) + return false; + + // Now we know we can fold the instruction in all its users. + if (UsedRegA) + MRI->clearKillFlags(UsedRegA); + if (UsedRegB) + MRI->clearKillFlags(UsedRegB); + + for (auto &[SinkDst, MaybeAM] : SinkInto) { + MachineInstr *New = nullptr; + LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into"; + SinkDst->dump()); + if (SinkDst->isCopy()) { + // Sink a copy of the instruction, replacing a COPY instruction. + MachineBasicBlock::iterator InsertPt = SinkDst->getIterator(); + Register DstReg = SinkDst->getOperand(0).getReg(); + TII->reMaterialize(*SinkDst->getParent(), InsertPt, DstReg, 0, MI, *TRI); + // If the original instruction did not have source location, reuse a one + // from the COPY. + New = &*std::prev(InsertPt); + if (const DebugLoc &NewLoc = New->getDebugLoc(); !NewLoc) + New->setDebugLoc(SinkDst->getDebugLoc()); + // Sink DBG_VALUEs, which refer to the original instruction's destination + // (DefReg). + MachineBasicBlock &SinkMBB = *SinkDst->getParent(); + auto &DbgUsers = SeenDbgUsers[DefReg]; + for (auto &U : DbgUsers) { + MachineInstr *DbgMI = U.getPointer(); + if (U.getInt()) + continue; + MachineInstr *NewDbgMI = SinkDst->getMF()->CloneMachineInstr(DbgMI); + NewDbgMI->getOperand(0).setReg(DstReg); + SinkMBB.insertAfter(InsertPt, NewDbgMI); + } + } else { + // Fold instruction into the addressing mode of a memory instruction. + New = TII->emitLdStWithAddr(*SinkDst, MaybeAM); + } + LLVM_DEBUG(dbgs() << "yielding"; New->dump()); + SinkDst->eraseFromParent(); + } + + MI.eraseFromParent(); + + // Collect instructions that need to be deleted (COPYs). We cannot delete them + // while traversing register uses. + SmallVector<MachineInstr *> CleanupInstrs; + Worklist.push_back(DefReg); + while (!Worklist.empty()) { + Register Reg = Worklist.pop_back_val(); + + for (MachineOperand &MO : MRI->use_operands(Reg)) { + MachineInstr *U = MO.getParent(); + assert((U->isCopy() || U->isDebugInstr()) && + "Only debug uses and copies must remain"); + if (U->isCopy()) { + Worklist.push_back(U->getOperand(0).getReg()); + CleanupInstrs.push_back(U); + } else { + MO.setReg(0); + MO.setSubReg(0); + } + } + } + + // Delete the dead COPYs. + for (MachineInstr *Del : CleanupInstrs) + Del->eraseFromParent(); + + return true; +} + /// AllUsesDominatedByBlock - Return true if all uses of the specified register /// occur in blocks dominated by the specified block. If any use is in the /// definition block, then return false since it is never legal to move def @@ -461,8 +691,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n"); - TII = MF.getSubtarget().getInstrInfo(); - TRI = MF.getSubtarget().getRegisterInfo(); + STI = &MF.getSubtarget(); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); MRI = &MF.getRegInfo(); DT = &getAnalysis<MachineDominatorTree>(); PDT = &getAnalysis<MachinePostDominatorTree>(); @@ -471,6 +702,8 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); RegClassInfo.runOnMachineFunction(MF); + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); + EnableSinkAndFold = PassConfig->getEnableSinkAndFold(); bool EverMadeChange = false; @@ -547,8 +780,8 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { } bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { - // Can't sink anything out of a block that has less than two successors. - if (MBB.succ_size() <= 1 || MBB.empty()) return false; + if ((!EnableSinkAndFold && MBB.succ_size() <= 1) || MBB.empty()) + return false; // Don't bother sinking code out of unreachable blocks. In addition to being // unprofitable, it can also lead to infinite looping, because in an @@ -579,8 +812,16 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { continue; } - bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); - if (Joined) { + if (EnableSinkAndFold && PerformSinkAndFold(MI, &MBB)) { + MadeChange = true; + continue; + } + + // Can't sink anything out of a block that has less than two successors. + if (MBB.succ_size() <= 1) + continue; + + if (PerformTrivialForwardCoalescing(MI, &MBB)) { MadeChange = true; continue; } @@ -597,7 +838,6 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { SeenDbgVars.clear(); // recalculate the bb register pressure after sinking one BB. CachedRegisterPressure.clear(); - return MadeChange; } @@ -737,7 +977,7 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, } std::vector<unsigned> & -MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) { +MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) { // Currently to save compiling time, MBB's register pressure will not change // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's // register pressure is changed after sinking any instructions into it. @@ -753,10 +993,10 @@ MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) { RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(), /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); - for (MachineBasicBlock::iterator MII = MBB.instr_end(), - MIE = MBB.instr_begin(); + for (MachineBasicBlock::const_iterator MII = MBB.instr_end(), + MIE = MBB.instr_begin(); MII != MIE; --MII) { - MachineInstr &MI = *std::prev(MII); + const MachineInstr &MI = *std::prev(MII); if (MI.isDebugInstr() || MI.isPseudoProbe()) continue; RegisterOperands RegOpers; @@ -772,6 +1012,19 @@ MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) { return It.first->second; } +bool MachineSinking::registerPressureSetExceedsLimit( + unsigned NRegs, const TargetRegisterClass *RC, + const MachineBasicBlock &MBB) { + unsigned Weight = NRegs * TRI->getRegClassWeight(RC).RegWeight; + const int *PS = TRI->getRegClassPressureSets(RC); + std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB); + for (; *PS != -1; PS++) + if (Weight + BBRegisterPressure[*PS] >= + TRI->getRegPressureSetLimit(*MBB.getParent(), *PS)) + return true; + return false; +} + /// isProfitableToSinkTo - Return true if it is profitable to sink MI. bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -816,21 +1069,6 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, if (!MCycle) return false; - auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) { - unsigned Weight = TRI->getRegClassWeight(RC).RegWeight; - const int *PS = TRI->getRegClassPressureSets(RC); - // Get register pressure for block SuccToSinkTo. - std::vector<unsigned> BBRegisterPressure = - getBBRegisterPressure(*SuccToSinkTo); - for (; *PS != -1; PS++) - // check if any register pressure set exceeds limit in block SuccToSinkTo - // after sinking. - if (Weight + BBRegisterPressure[*PS] >= - TRI->getRegPressureSetLimit(*MBB->getParent(), *PS)) - return true; - return false; - }; - // If this instruction is inside a Cycle and sinking this instruction can make // more registers live range shorten, it is still prifitable. for (const MachineOperand &MO : MI.operands()) { @@ -870,7 +1108,8 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, // The DefMI is defined inside the cycle. // If sinking this operand makes some register pressure set exceed limit, // it is not profitable. - if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) { + if (registerPressureSetExceedsLimit(1, MRI->getRegClass(Reg), + *SuccToSinkTo)) { LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable."); return false; } |