aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
diff options
context:
space:
mode:
authorChristudasan Devadasan <christudasan.devadasan@amd.com>2024-01-24 07:08:43 +0530
committerGitHub <noreply@github.com>2024-01-24 07:08:43 +0530
commit230c13d59d0843c3b738920b85c341cc78a61fa9 (patch)
tree2b21ba0fb19a086167639d4a212b4d56d0e48e2c /llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
parent7e50f006f7f652b9a5ac5ddd64deba5f1c9388a8 (diff)
downloadllvm-230c13d59d0843c3b738920b85c341cc78a61fa9.zip
llvm-230c13d59d0843c3b738920b85c341cc78a61fa9.tar.gz
llvm-230c13d59d0843c3b738920b85c341cc78a61fa9.tar.bz2
[AMDGPU] Pick available high VGPR for CSR SGPR spilling (#78669)
CSR SGPR spilling currently uses the early available physical VGPRs. It currently imposes a high register pressure while trying to allocate large VGPR tuples within the default register budget. This patch changes the spilling strategy by picking the VGPRs in the reverse order, the highest available VGPR first and later after regalloc shift them back to the lowest available range. With that, the initial VGPRs would be available for allocation and possibility of finding large number of contiguous registers will be more.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp54
1 files changed, 43 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index e814224..b94d143 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -312,6 +312,33 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
return false;
}
+void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange(
+ MachineFunction &MF) {
+ const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (unsigned I = 0, E = SpillPhysVGPRs.size(); I < E; ++I) {
+ Register Reg = SpillPhysVGPRs[I];
+ Register NewReg =
+ TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
+ if (!NewReg || NewReg >= Reg)
+ break;
+
+ MRI.replaceRegWith(Reg, NewReg);
+
+ // Update various tables with the new VGPR.
+ SpillPhysVGPRs[I] = NewReg;
+ WWMReservedRegs.remove(Reg);
+ WWMReservedRegs.insert(NewReg);
+ WWMSpills.insert(std::make_pair(NewReg, WWMSpills[Reg]));
+ WWMSpills.erase(Reg);
+
+ for (MachineBasicBlock &MBB : MF) {
+ MBB.removeLiveIn(Reg);
+ MBB.sortUniqueLiveIns();
+ }
+ }
+}
+
bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
MachineFunction &MF, int FI, unsigned LaneIndex) {
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -329,13 +356,17 @@ bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
}
bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
- MachineFunction &MF, int FI, unsigned LaneIndex) {
+ MachineFunction &MF, int FI, unsigned LaneIndex, bool IsPrologEpilog) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
Register LaneVGPR;
if (!LaneIndex) {
- LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
+ // Find the highest available register if called before RA to ensure the
+ // lowest registers are available for allocation. The LaneVGPR, in that
+ // case, will be shifted back to the lowest range after VGPR allocation.
+ LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF,
+ !IsPrologEpilog);
if (LaneVGPR == AMDGPU::NoRegister) {
// We have no VGPRs left for spilling SGPRs. Reset because we will not
// partially spill the SGPR to VGPRs.
@@ -359,12 +390,12 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
return true;
}
-bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,
- int FI,
- bool IsPrologEpilog) {
+bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(
+ MachineFunction &MF, int FI, bool SpillToPhysVGPRLane,
+ bool IsPrologEpilog) {
std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
- IsPrologEpilog ? SGPRSpillsToPhysicalVGPRLanes[FI]
- : SGPRSpillsToVirtualVGPRLanes[FI];
+ SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI]
+ : SGPRSpillsToVirtualVGPRLanes[FI];
// This has already been allocated.
if (!SpillLanes.empty())
@@ -384,14 +415,15 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,
assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
"not spilling SGPRs to VGPRs");
- unsigned &NumSpillLanes =
- IsPrologEpilog ? NumPhysicalVGPRSpillLanes : NumVirtualVGPRSpillLanes;
+ unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes
+ : NumVirtualVGPRSpillLanes;
for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {
unsigned LaneIndex = (NumSpillLanes % WaveSize);
- bool Allocated = IsPrologEpilog
- ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex)
+ bool Allocated = SpillToPhysVGPRLane
+ ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex,
+ IsPrologEpilog)
: allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
if (!Allocated) {
NumSpillLanes -= I;