diff options
author | Christudasan Devadasan <christudasan.devadasan@amd.com> | 2024-01-24 07:08:43 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-24 07:08:43 +0530 |
commit | 230c13d59d0843c3b738920b85c341cc78a61fa9 (patch) | |
tree | 2b21ba0fb19a086167639d4a212b4d56d0e48e2c /llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | |
parent | 7e50f006f7f652b9a5ac5ddd64deba5f1c9388a8 (diff) | |
download | llvm-230c13d59d0843c3b738920b85c341cc78a61fa9.zip llvm-230c13d59d0843c3b738920b85c341cc78a61fa9.tar.gz llvm-230c13d59d0843c3b738920b85c341cc78a61fa9.tar.bz2 |
[AMDGPU] Pick available high VGPR for CSR SGPR spilling (#78669)
CSR SGPR spilling currently uses the early available physical VGPRs. It
currently imposes a high register pressure while trying to allocate
large VGPR tuples within the default register budget.
This patch changes the spilling strategy by picking the VGPRs in the
reverse order, the highest available VGPR first and later after regalloc
shift them back to the lowest available range. With that, the initial
VGPRs would be available for allocation and possibility
of finding large number of contiguous registers will be more.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 54 |
1 files changed, 43 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index e814224..b94d143 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -312,6 +312,33 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, return false; } +void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange( + MachineFunction &MF) { + const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (unsigned I = 0, E = SpillPhysVGPRs.size(); I < E; ++I) { + Register Reg = SpillPhysVGPRs[I]; + Register NewReg = + TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); + if (!NewReg || NewReg >= Reg) + break; + + MRI.replaceRegWith(Reg, NewReg); + + // Update various tables with the new VGPR. + SpillPhysVGPRs[I] = NewReg; + WWMReservedRegs.remove(Reg); + WWMReservedRegs.insert(NewReg); + WWMSpills.insert(std::make_pair(NewReg, WWMSpills[Reg])); + WWMSpills.erase(Reg); + + for (MachineBasicBlock &MBB : MF) { + MBB.removeLiveIn(Reg); + MBB.sortUniqueLiveIns(); + } + } +} + bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills( MachineFunction &MF, int FI, unsigned LaneIndex) { MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -329,13 +356,17 @@ bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills( } bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills( - MachineFunction &MF, int FI, unsigned LaneIndex) { + MachineFunction &MF, int FI, unsigned LaneIndex, bool IsPrologEpilog) { const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); Register LaneVGPR; if (!LaneIndex) { - LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); + // Find the highest available register if called before RA to ensure the + // lowest registers are available for allocation. The LaneVGPR, in that + // case, will be shifted back to the lowest range after VGPR allocation. + LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF, + !IsPrologEpilog); if (LaneVGPR == AMDGPU::NoRegister) { // We have no VGPRs left for spilling SGPRs. Reset because we will not // partially spill the SGPR to VGPRs. @@ -359,12 +390,12 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills( return true; } -bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF, - int FI, - bool IsPrologEpilog) { +bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane( + MachineFunction &MF, int FI, bool SpillToPhysVGPRLane, + bool IsPrologEpilog) { std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = - IsPrologEpilog ? SGPRSpillsToPhysicalVGPRLanes[FI] - : SGPRSpillsToVirtualVGPRLanes[FI]; + SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI] + : SGPRSpillsToVirtualVGPRLanes[FI]; // This has already been allocated. if (!SpillLanes.empty()) @@ -384,14 +415,15 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF, assert(ST.getRegisterInfo()->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); - unsigned &NumSpillLanes = - IsPrologEpilog ? NumPhysicalVGPRSpillLanes : NumVirtualVGPRSpillLanes; + unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes + : NumVirtualVGPRSpillLanes; for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) { unsigned LaneIndex = (NumSpillLanes % WaveSize); - bool Allocated = IsPrologEpilog - ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex) + bool Allocated = SpillToPhysVGPRLane + ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex, + IsPrologEpilog) : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex); if (!Allocated) { NumSpillLanes -= I; |