diff options
Diffstat (limited to 'llvm/lib/CodeGen/MachineSink.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachineSink.cpp | 267 |
1 files changed, 176 insertions, 91 deletions
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 03d93ce..5cb5c0b 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -45,6 +45,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -113,6 +114,8 @@ STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); +using RegSubRegPair = TargetInstrInfo::RegSubRegPair; + namespace { class MachineSinking : public MachineFunctionPass { @@ -128,6 +131,7 @@ class MachineSinking : public MachineFunctionPass { const MachineBranchProbabilityInfo *MBPI = nullptr; AliasAnalysis *AA = nullptr; RegisterClassInfo RegClassInfo; + TargetSchedModel SchedModel; // Remember which edges have been considered for breaking. SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8> @@ -161,6 +165,8 @@ class MachineSinking : public MachineFunctionPass { /// would re-order assignments. using SeenDbgUser = PointerIntPair<MachineInstr *, 1>; + using SinkItem = std::pair<MachineInstr *, MachineBasicBlock *>; + /// Record of DBG_VALUE uses of vregs in a block, so that we can identify /// debug instructions to sink. SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers; @@ -255,7 +261,10 @@ private: void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, SmallVectorImpl<MachineInstr *> &Candidates); - bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I); + + bool + aggressivelySinkIntoCycle(MachineCycle *Cycle, MachineInstr &I, + DenseMap<SinkItem, MachineInstr *> &SunkInstrs); bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -271,11 +280,14 @@ private: GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const; - std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB); + std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB, + bool UseCache = true); bool registerPressureSetExceedsLimit(unsigned NRegs, const TargetRegisterClass *RC, const MachineBasicBlock &MBB); + + bool registerPressureExceedsLimit(const MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -680,6 +692,10 @@ void MachineSinking::FindCycleSinkCandidates( SmallVectorImpl<MachineInstr *> &Candidates) { for (auto &MI : *BB) { LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); + if (MI.isMetaInstruction()) { + LLVM_DEBUG(dbgs() << "CycleSink: not sinking meta instruction\n"); + continue; + } if (!TII->shouldSink(MI)) { LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this " "target\n"); @@ -775,31 +791,62 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { if (SinkInstsIntoCycle) { SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles()); - for (auto *Cycle : Cycles) { - MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); - if (!Preheader) { - LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); - continue; - } - SmallVector<MachineInstr *, 8> Candidates; - FindCycleSinkCandidates(Cycle, Preheader, Candidates); - - // Walk the candidates in reverse order so that we start with the use - // of a def-use chain, if there is any. - // TODO: Sort the candidates using a cost-model. - unsigned i = 0; - for (MachineInstr *I : llvm::reverse(Candidates)) { - if (i++ == SinkIntoCycleLimit) { - LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to " - "be analysed."); - break; + SchedModel.init(STI); + bool HasHighPressure; + + DenseMap<SinkItem, MachineInstr *> SunkInstrs; + + enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END }; + for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END; + ++Stage, SunkInstrs.clear()) { + HasHighPressure = false; + + for (auto *Cycle : Cycles) { + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + if (!Preheader) { + LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); + continue; } + SmallVector<MachineInstr *, 8> Candidates; + FindCycleSinkCandidates(Cycle, Preheader, Candidates); + + unsigned i = 0; + + // Walk the candidates in reverse order so that we start with the use + // of a def-use chain, if there is any. + // TODO: Sort the candidates using a cost-model. + for (MachineInstr *I : llvm::reverse(Candidates)) { + // CycleSinkStage::COPY: Sink a limited number of copies + if (Stage == CycleSinkStage::COPY) { + if (i++ == SinkIntoCycleLimit) { + LLVM_DEBUG(dbgs() + << "CycleSink: Limit reached of instructions to " + "be analyzed."); + break; + } + + if (!I->isCopy()) + continue; + } - if (!SinkIntoCycle(Cycle, *I)) - break; - EverMadeChange = true; - ++NumCycleSunk; + // CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions + // which the target specifies as low-latency + if (Stage == CycleSinkStage::LOW_LATENCY && + !TII->hasLowDefLatency(SchedModel, *I, 0)) + continue; + + if (!aggressivelySinkIntoCycle(Cycle, *I, SunkInstrs)) + break; + EverMadeChange = true; + ++NumCycleSunk; + } + + // Recalculate the pressure after sinking + if (!HasHighPressure) + HasHighPressure = registerPressureExceedsLimit(*Preheader); } + if (!HasHighPressure) + break; } } @@ -1055,13 +1102,15 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, } std::vector<unsigned> & -MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) { +MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB, + bool UseCache) { // Currently to save compiling time, MBB's register pressure will not change // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's // register pressure is changed after sinking any instructions into it. // FIXME: need a accurate and cheap register pressure estiminate model here. + auto RP = CachedRegisterPressure.find(&MBB); - if (RP != CachedRegisterPressure.end()) + if (UseCache && RP != CachedRegisterPressure.end()) return RP->second; RegionPressure Pressure; @@ -1085,6 +1134,12 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) { } RPTracker.closeRegion(); + + if (RP != CachedRegisterPressure.end()) { + CachedRegisterPressure[&MBB] = RPTracker.getPressure().MaxSetPressure; + return CachedRegisterPressure[&MBB]; + } + auto It = CachedRegisterPressure.insert( std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure)); return It.first->second; @@ -1103,6 +1158,21 @@ bool MachineSinking::registerPressureSetExceedsLimit( return false; } +// Recalculate RP and check if any pressure set exceeds the set limit. +bool MachineSinking::registerPressureExceedsLimit( + const MachineBasicBlock &MBB) { + std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB, false); + + for (unsigned PS = 0; PS < BBRegisterPressure.size(); ++PS) { + if (BBRegisterPressure[PS] >= + TRI->getRegPressureSetLimit(*MBB.getParent(), PS)) { + return true; + } + } + + return false; +} + /// isProfitableToSinkTo - Return true if it is profitable to sink MI. bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -1581,83 +1651,98 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, return HasAliasedStore; } -/// Sink instructions into cycles if profitable. This especially tries to -/// prevent register spills caused by register pressure if there is little to no -/// overhead moving instructions into cycles. -bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) { - LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I); +/// Aggressively sink instructions into cycles. This will aggressively try to +/// sink all instructions in the top-most preheaders in an attempt to reduce RP. +/// In particular, it will sink into multiple successor blocks without limits +/// based on the amount of sinking, or the type of ops being sunk (so long as +/// they are safe to sink). +bool MachineSinking::aggressivelySinkIntoCycle( + MachineCycle *Cycle, MachineInstr &I, + DenseMap<SinkItem, MachineInstr *> &SunkInstrs) { + // TODO: support instructions with multiple defs + if (I.getNumDefs() > 1) + return false; + + LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I); MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); assert(Preheader && "Cycle sink needs a preheader block"); - MachineBasicBlock *SinkBlock = nullptr; - bool CanSink = true; - const MachineOperand &MO = I.getOperand(0); - - for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { - LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI); - if (!Cycle->contains(MI.getParent())) { - LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n"); - CanSink = false; - break; - } + SmallVector<std::pair<RegSubRegPair, MachineInstr *>> Uses; - // FIXME: Come up with a proper cost model that estimates whether sinking - // the instruction (and thus possibly executing it on every cycle - // iteration) is more expensive than a register. - // For now assumes that copies are cheap and thus almost always worth it. - if (!MI.isCopy()) { - LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n"); - CanSink = false; - break; + MachineOperand &DefMO = I.getOperand(0); + for (MachineInstr &MI : MRI->use_instructions(DefMO.getReg())) { + Uses.push_back({{DefMO.getReg(), DefMO.getSubReg()}, &MI}); + } + + for (std::pair<RegSubRegPair, MachineInstr *> Entry : Uses) { + MachineInstr *MI = Entry.second; + LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Analysing use: " << MI); + if (MI->isPHI()) { + LLVM_DEBUG( + dbgs() << "AggressiveCycleSink: Not attempting to sink for PHI.\n"); + continue; } - if (!SinkBlock) { - SinkBlock = MI.getParent(); - LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: " - << printMBBReference(*SinkBlock) << "\n"); + // We cannot sink before the prologue + if (MI->isPosition() || TII->isBasicBlockPrologue(*MI)) { + LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Use is BasicBlock prologue, " + "can't sink.\n"); continue; } - SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); - if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n"); - CanSink = false; - break; + if (!Cycle->contains(MI->getParent())) { + LLVM_DEBUG( + dbgs() << "AggressiveCycleSink: Use not in cycle, can't sink.\n"); + continue; } - LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " - << printMBBReference(*SinkBlock) << "\n"); - } - if (!CanSink) { - LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n"); - return false; - } - if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n"); - return false; - } - if (SinkBlock == Preheader) { - LLVM_DEBUG( - dbgs() << "CycleSink: Not sinking, sink block is the preheader\n"); - return false; - } - if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) { - LLVM_DEBUG( - dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n"); - return false; - } + MachineBasicBlock *SinkBlock = MI->getParent(); + MachineInstr *NewMI = nullptr; + SinkItem MapEntry(&I, SinkBlock); + + auto SI = SunkInstrs.find(MapEntry); + + // Check for the case in which we have already sunk a copy of this + // instruction into the user block. + if (SI != SunkInstrs.end()) { + LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Already sunk to block: " + << printMBBReference(*SinkBlock) << "\n"); + NewMI = SI->second; + } - LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n"); - SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader, - I); + // Create a copy of the instruction in the use block. + if (!NewMI) { + LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Sinking instruction to block: " + << printMBBReference(*SinkBlock) << "\n"); + + NewMI = I.getMF()->CloneMachineInstr(&I); + if (DefMO.getReg().isVirtual()) { + const TargetRegisterClass *TRC = MRI->getRegClass(DefMO.getReg()); + Register DestReg = MRI->createVirtualRegister(TRC); + NewMI->substituteRegister(DefMO.getReg(), DestReg, DefMO.getSubReg(), + *TRI); + } + SinkBlock->insert(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), + NewMI); + SunkInstrs.insert({MapEntry, NewMI}); + } - // Conservatively clear any kill flags on uses of sunk instruction - for (MachineOperand &MO : I.operands()) { - if (MO.isReg() && MO.readsReg()) + // Conservatively clear any kill flags on uses of sunk instruction + for (MachineOperand &MO : NewMI->all_uses()) { + assert(MO.isReg() && MO.isUse()); RegsToClearKillFlags.insert(MO.getReg()); - } + } - // The instruction is moved from its basic block, so do not retain the - // debug information. - assert(!I.isDebugInstr() && "Should not sink debug inst"); - I.setDebugLoc(DebugLoc()); + // The instruction is moved from its basic block, so do not retain the + // debug information. + assert(!NewMI->isDebugInstr() && "Should not sink debug inst"); + NewMI->setDebugLoc(DebugLoc()); + + // Replace the use with the newly created virtual register. + RegSubRegPair &UseReg = Entry.first; + MI->substituteRegister(UseReg.Reg, NewMI->getOperand(0).getReg(), + UseReg.SubReg, *TRI); + } + // If we have replaced all uses, then delete the dead instruction + if (I.isDead(*MRI)) + I.eraseFromParent(); return true; } |