aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaohaiWen <haohai.wen@intel.com>2023-12-22 23:06:16 +0800
committerGitHub <noreply@github.com>2023-12-22 23:06:16 +0800
commit40ec791b15b0110785a91b057e95535e8b0989b6 (patch)
tree0e0c27e550abc7f4251d0e2c96c62a189ad1e6d3
parent5cb7534a7d17e65bdfa676a4e56698e2a05b89b9 (diff)
downloadllvm-40ec791b15b0110785a91b057e95535e8b0989b6.zip
llvm-40ec791b15b0110785a91b057e95535e8b0989b6.tar.gz
llvm-40ec791b15b0110785a91b057e95535e8b0989b6.tar.bz2
[RegAllocFast] Refactor dominates algorithm for large basic block (#72250)
The original brute force dominates algorithm is O(n) complexity so it is very slow for very large machine basic block which is very common with O0. This patch added InstrPosIndexes to assign index for each instruction and use it to determine dominance. The complexity is now O(1).
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp128
1 files changed, 114 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 40c42cab..a52013a 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -62,6 +62,107 @@ static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator",
namespace {
+/// Assign ascending index for instructions in machine basic block. The index
+/// can be used to determine dominance between instructions in same MBB.
+class InstrPosIndexes {
+public:
+ void init(const MachineBasicBlock &MBB) {
+ CurMBB = &MBB;
+ Instr2PosIndex.clear();
+ uint64_t LastIndex = 0;
+ for (const MachineInstr &MI : MBB) {
+ LastIndex += InstrDist;
+ Instr2PosIndex[&MI] = LastIndex;
+ }
+ }
+
+ /// Set \p Index to index of \p MI. If \p MI is new inserted, it try to assign
+ /// index without affecting existing instruction's index. Return true if all
+ /// instructions index has been reassigned.
+ bool getIndex(const MachineInstr &MI, uint64_t &Index) {
+ assert(MI.getParent() == CurMBB && "MI is not in CurMBB");
+ if (Instr2PosIndex.count(&MI)) {
+ Index = Instr2PosIndex[&MI];
+ return false;
+ }
+
+ // Distance is the number of consecutive unassigned instructions including
+ // MI. Start is the first instruction of them. End is the next of last
+ // instruction of them.
+ // e.g.
+ // |Instruction| A | B | C | MI | D | E |
+ // | Index | 1024 | | | | | 2048 |
+ //
+ // In this case, B, C, MI, D are unassigned. Distance is 4, Start is B, End
+ // is E.
+ unsigned Distance = 1;
+ MachineBasicBlock::const_iterator Start = MI.getIterator(),
+ End = std::next(Start);
+ while (Start != CurMBB->begin() &&
+ !Instr2PosIndex.count(&*std::prev(Start))) {
+ --Start;
+ ++Distance;
+ }
+ while (End != CurMBB->end() && !Instr2PosIndex.count(&*(End))) {
+ ++End;
+ ++Distance;
+ }
+
+ // LastIndex is initialized to last used index prior to MI or zero.
+ // In previous example, LastIndex is 1024, EndIndex is 2048;
+ uint64_t LastIndex =
+ Start == CurMBB->begin() ? 0 : Instr2PosIndex.at(&*std::prev(Start));
+ uint64_t Step;
+ if (End == CurMBB->end())
+ Step = static_cast<uint64_t>(InstrDist);
+ else {
+ // No instruction uses index zero.
+ uint64_t EndIndex = Instr2PosIndex.at(&*End);
+ assert(EndIndex > LastIndex && "Index must be ascending order");
+ unsigned NumAvailableIndexes = EndIndex - LastIndex - 1;
+ // We want index gap between two adjacent MI is as same as possible. Given
+ // total A available indexes, D is number of consecutive unassigned
+ // instructions, S is the step.
+ // |<- S-1 -> MI <- S-1 -> MI <- A-S*D ->|
+ // There're S-1 available indexes between unassigned instruction and its
+ // predecessor. There're A-S*D available indexes between the last
+ // unassigned instruction and its successor.
+ // Ideally, we want
+ // S-1 = A-S*D
+ // then
+ // S = (A+1)/(D+1)
+ // An valid S must be integer greater than zero, so
+ // S <= (A+1)/(D+1)
+ // =>
+ // A-S*D >= 0
+ // That means we can safely use (A+1)/(D+1) as step.
+ // In previous example, Step is 204, Index of B, C, MI, D is 1228, 1432,
+ // 1636, 1840.
+ Step = (NumAvailableIndexes + 1) / (Distance + 1);
+ }
+
+ // Reassign index for all instructions if number of new inserted
+ // instructions exceed slot or all instructions are new.
+ if (LLVM_UNLIKELY(!Step || (!LastIndex && Step == InstrDist))) {
+ init(*CurMBB);
+ Index = Instr2PosIndex.at(&MI);
+ return true;
+ }
+
+ for (auto I = Start; I != End; ++I) {
+ LastIndex += Step;
+ Instr2PosIndex[&*I] = LastIndex;
+ }
+ Index = Instr2PosIndex.at(&MI);
+ return false;
+ }
+
+private:
+ enum { InstrDist = 1024 };
+ const MachineBasicBlock *CurMBB = nullptr;
+ DenseMap<const MachineInstr *, uint64_t> Instr2PosIndex;
+};
+
class RegAllocFast : public MachineFunctionPass {
public:
static char ID;
@@ -153,6 +254,9 @@ private:
// Register masks attached to the current instruction.
SmallVector<const uint32_t *> RegMasks;
+ // Assign index for each instruction to quickly determine dominance.
+ InstrPosIndexes PosIndexes;
+
void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
bool isPhysRegFree(MCPhysReg PhysReg) const;
@@ -339,18 +443,13 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) {
return FrameIdx;
}
-static bool dominates(MachineBasicBlock &MBB,
- MachineBasicBlock::const_iterator A,
- MachineBasicBlock::const_iterator B) {
- auto MBBEnd = MBB.end();
- if (B == MBBEnd)
- return true;
-
- MachineBasicBlock::const_iterator I = MBB.begin();
- for (; &*I != A && &*I != B; ++I)
- ;
-
- return &*I == A;
+static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A,
+ const MachineInstr &B) {
+ uint64_t IndexA, IndexB;
+ PosIndexes.getIndex(A, IndexA);
+ if (LLVM_UNLIKELY(PosIndexes.getIndex(B, IndexB)))
+ PosIndexes.getIndex(A, IndexA);
+ return IndexA < IndexB;
}
/// Returns false if \p VirtReg is known to not live out of the current block.
@@ -371,7 +470,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
} else {
- if (!SelfLoopDef || dominates(*MBB, DefInst.getIterator(), SelfLoopDef))
+ if (!SelfLoopDef || dominates(PosIndexes, DefInst, *SelfLoopDef))
SelfLoopDef = &DefInst;
}
}
@@ -396,7 +495,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
// Try to handle some simple cases to avoid spilling and reloading every
// value inside a self looping block.
if (SelfLoopDef == &UseInst ||
- !dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) {
+ !dominates(PosIndexes, *SelfLoopDef, UseInst)) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
}
@@ -1565,6 +1664,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
+ PosIndexes.init(MBB);
RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");