diff options
author | Sjoerd Meijer <sjoerd.meijer@arm.com> | 2021-01-08 09:32:52 +0000 |
---|---|---|
committer | Sjoerd Meijer <sjoerd.meijer@arm.com> | 2021-01-27 10:49:56 +0000 |
commit | 48ecba350ed6477ae194720b263eff194bf0271c (patch) | |
tree | 8154b5974148bb4712ba951e32475c02f2acd056 /llvm/lib | |
parent | 0175cd00a1af35aa90e49bf008a0d4d4cbc7fb89 (diff) | |
download | llvm-48ecba350ed6477ae194720b263eff194bf0271c.zip llvm-48ecba350ed6477ae194720b263eff194bf0271c.tar.gz llvm-48ecba350ed6477ae194720b263eff194bf0271c.tar.bz2 |
[MachineLICM][MachineSink] Move SinkIntoLoop to MachineSink.
This moves SinkIntoLoop from MachineLICM to MachineSink. The motivation for
this work is that hoisting is a canonicalisation transformation, but we do not
really have a good story to sink instructions back if that is better, e.g. to
reduce live-ranges, register pressure and spilling. This has been discussed a
few times on the list, the latest thread is:
https://lists.llvm.org/pipermail/llvm-dev/2020-December/147184.html
There it was pointed out that we have the LoopSink IR pass, but that works on
IR, lacks register pressure informatiom, and is focused on profile guided
optimisations, and then we have MachineLICM and MachineSink that both perform
sinking. MachineLICM is more about hoisting and CSE'ing of hoisted
instructions. It also contained a very incomplete and disabled-by-default
SinkIntoLoop feature, which we now move to MachineSink.
Getting loop-sinking to do something useful is going to be at least a 3-step
approach:
1) This is just moving the code and is almost a NFC, but contains a bug fix.
This uses helper function `isLoopInvariant` that was factored out in D94082 and
added to MachineLoop.
2) A first functional change to make loop-sink a little bit less restrictive,
which it really is at the moment, is the change in D94308. This lets it do
more (alias) analysis using functions in MachineSink, making it a bit more
powerful. Nothing changes much: still off by default. But it shows that
MachineSink is a better home for this, and it starts using its functionality
like `hasStoreBetween`, and in the next step we can use `isProfitableToSinkTo`.
3) This is the going to be he interesting step: decision making when and how
many instructions to sink. This will be driven by the register pressure, and
deciding if reducing live-ranges and loop sinking will help in better
performance.
4) Once we are happy with 3), this should be enabled by default, that should be
the end goal of this exercise.
Differential Revision: https://reviews.llvm.org/D93694
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/MachineLICM.cpp | 92 | ||||
-rw-r--r-- | llvm/lib/CodeGen/MachineSink.cpp | 156 |
2 files changed, 156 insertions, 92 deletions
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index c06bc39..5a0f0bf 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -69,11 +69,6 @@ HoistCheapInsts("hoist-cheap-insts", cl::init(false), cl::Hidden); static cl::opt<bool> -SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", - cl::desc("MachineLICM should sink instructions into " - "loops to avoid register spills"), - cl::init(false), cl::Hidden); -static cl::opt<bool> HoistConstStores("hoist-const-stores", cl::desc("Hoist invariant stores"), cl::init(true), cl::Hidden); @@ -246,8 +241,6 @@ namespace { void HoistOutOfLoop(MachineDomTreeNode *HeaderN); - void SinkIntoLoop(); - void InitRegPressure(MachineBasicBlock *BB); DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI, @@ -395,9 +388,6 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { FirstInLoop = true; HoistOutOfLoop(N); CSEMap.clear(); - - if (SinkInstsToAvoidSpills) - SinkIntoLoop(); } } @@ -787,88 +777,6 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { } } -/// Sink instructions into loops if profitable. This especially tries to prevent -/// register spills caused by register pressure if there is little to no -/// overhead moving instructions into loops. -void MachineLICMBase::SinkIntoLoop() { - MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) - return; - - SmallVector<MachineInstr *, 8> Candidates; - for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); - I != Preheader->instr_end(); ++I) { - // We need to ensure that we can safely move this instruction into the loop. - // As such, it must not have side-effects, e.g. such as a call has. - LLVM_DEBUG(dbgs() << "LICM: Analysing sink candidate: " << *I); - if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) { - LLVM_DEBUG(dbgs() << "LICM: Added as sink candidate.\n"); - Candidates.push_back(&*I); - continue; - } - LLVM_DEBUG(dbgs() << "LICM: Not added as sink candidate.\n"); - } - - for (MachineInstr *I : Candidates) { - const MachineOperand &MO = I->getOperand(0); - if (!MO.isDef() || !MO.isReg() || !MO.getReg()) - continue; - if (!MRI->hasOneDef(MO.getReg())) - continue; - bool CanSink = true; - MachineBasicBlock *SinkBlock = nullptr; - LLVM_DEBUG(dbgs() << "LICM: Try sinking: " << *I); - - for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { - LLVM_DEBUG(dbgs() << "LICM: Analysing use: "; MI.dump()); - // FIXME: Come up with a proper cost model that estimates whether sinking - // the instruction (and thus possibly executing it on every loop - // iteration) is more expensive than a register. - // For now assumes that copies are cheap and thus almost always worth it. - if (!MI.isCopy()) { - CanSink = false; - break; - } - if (!SinkBlock) { - SinkBlock = MI.getParent(); - LLVM_DEBUG(dbgs() << "LICM: Setting sink block to: " - << printMBBReference(*SinkBlock) << "\n"); - continue; - } - SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); - if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LICM: Can't find nearest dominator\n"); - CanSink = false; - break; - } - LLVM_DEBUG(dbgs() << "LICM: Setting nearest common dom block: " << - printMBBReference(*SinkBlock) << "\n"); - } - if (!CanSink) { - LLVM_DEBUG(dbgs() << "LICM: Can't sink instruction.\n"); - continue; - } - if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LICM: Not sinking, can't find sink block.\n"); - continue; - } - if (SinkBlock == Preheader) { - LLVM_DEBUG(dbgs() << "LICM: Not sinking, sink block is the preheader\n"); - continue; - } - - LLVM_DEBUG(dbgs() << "LICM: Sinking to " << printMBBReference(*SinkBlock) - << " from " << printMBBReference(*I->getParent()) - << ": " << *I); - SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); - - // The instruction is moved from its basic block, so do not retain the - // debug information. - assert(!I->isDebugInstr() && "Should not sink debug inst"); - I->setDebugLoc(DebugLoc()); - } -} - static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); } diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 378df1b..832147e 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -91,7 +91,14 @@ static cl::opt<unsigned> SinkLoadBlocksThreshold( "the straight line is higher than this threshold."), cl::init(20), cl::Hidden); +static cl::opt<bool> +SinkInstsIntoLoop("sink-insts-to-avoid-spills", + cl::desc("Sink instructions into loops to avoid " + "register spills"), + cl::init(false), cl::Hidden); + STATISTIC(NumSunk, "Number of machine instructions sunk"); +STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); @@ -216,6 +223,11 @@ namespace { bool &LocalUse) const; MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); + + void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, + SmallVectorImpl<MachineInstr *> &Candidates); + bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); + bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *SuccToSinkTo, @@ -340,6 +352,60 @@ bool MachineSinking::AllUsesDominatedByBlock(Register Reg, return true; } +/// Return true if this machine instruction loads from global offset table or +/// constant pool. +static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { + assert(MI.mayLoad() && "Expected MI that loads!"); + + // If we lost memory operands, conservatively assume that the instruction + // reads from everything.. + if (MI.memoperands_empty()) + return true; + + for (MachineMemOperand *MemOp : MI.memoperands()) + if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) + if (PSV->isGOT() || PSV->isConstantPool()) + return true; + + return false; +} + +void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, + SmallVectorImpl<MachineInstr *> &Candidates) { + for (auto &MI : *BB) { + LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI); + if (!TII->shouldSink(MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this " + "target\n"); + continue; + } + if (!L->isLoopInvariant(MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); + continue; + } + bool DontMoveAcrossStore = true; + if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { + LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); + continue; + } + if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); + continue; + } + if (MI.isConvergent()) + continue; + + const MachineOperand &MO = MI.getOperand(0); + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + continue; + if (!MRI->hasOneDef(MO.getReg())) + continue; + + LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); + Candidates.push_back(&MI); + } +} + bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -389,6 +455,29 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { EverMadeChange = true; } + if (SinkInstsIntoLoop) { + SmallVector<MachineLoop *, 8> Loops(LI->begin(), LI->end()); + for (auto *L : Loops) { + MachineBasicBlock *Preheader = LI->findLoopPreheader(L); + if (!Preheader) { + LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n"); + continue; + } + SmallVector<MachineInstr *, 8> Candidates; + FindLoopSinkCandidates(L, Preheader, Candidates); + + // Walk the candidates in reverse order so that we start with the use + // of a def-use chain, if there is any. + for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) { + MachineInstr *I = *It; + if (!SinkIntoLoop(L, *I)) + break; + EverMadeChange = true; + ++NumLoopSunk; + } + } + } + HasStoreCache.clear(); StoreInstrCache.clear(); @@ -1098,6 +1187,73 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, return HasAliasedStore; } +/// Sink instructions into loops if profitable. This especially tries to prevent +/// register spills caused by register pressure if there is little to no +/// overhead moving instructions into loops. +bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { + LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I); + MachineBasicBlock *Preheader = L->getLoopPreheader(); + assert(Preheader && "Loop sink needs a preheader block"); + MachineBasicBlock *SinkBlock = nullptr; + bool CanSink = true; + const MachineOperand &MO = I.getOperand(0); + + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI); + if (!L->contains(&MI)) { + LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n"); + CanSink = false; + break; + } + + // FIXME: Come up with a proper cost model that estimates whether sinking + // the instruction (and thus possibly executing it on every loop + // iteration) is more expensive than a register. + // For now assumes that copies are cheap and thus almost always worth it. + if (!MI.isCopy()) { + LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); + CanSink = false; + break; + } + if (!SinkBlock) { + SinkBlock = MI.getParent(); + LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " + << printMBBReference(*SinkBlock) << "\n"); + continue; + } + SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); + if (!SinkBlock) { + LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n"); + CanSink = false; + break; + } + LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " << + printMBBReference(*SinkBlock) << "\n"); + } + + if (!CanSink) { + LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n"); + return false; + } + if (!SinkBlock) { + LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n"); + return false; + } + if (SinkBlock == Preheader) { + LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); + SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); + + // The instruction is moved from its basic block, so do not retain the + // debug information. + assert(!I.isDebugInstr() && "Should not sink debug inst"); + I.setDebugLoc(DebugLoc()); + return true; +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, |