diff options
| author | Christudasan Devadasan <Christudasan.Devadasan@amd.com> | 2026-01-08 07:53:18 +0000 |
|---|---|---|
| committer | Christudasan Devadasan <Christudasan.Devadasan@amd.com> | 2026-01-13 03:50:24 +0000 |
| commit | 070b3e99ac751c9e6cb38201e0eec68d72a55542 (patch) | |
| tree | b798e5d2717b69aa89cccd322f5518d37f9e47c0 /llvm/lib/CodeGen | |
| parent | 12d1aa0c8430c9d8015bfb285aae7d5e260db8ad (diff) | |
| download | llvm-users/cdevadas/subreg-reload.zip llvm-users/cdevadas/subreg-reload.tar.gz llvm-users/cdevadas/subreg-reload.tar.bz2 | |
[InlineSpiller][AMDGPU] Implement subreg reload during RA spillusers/cdevadas/subreg-reload
Currently, when a virtual register is partially used, the
entire tuple is restored from the spilled location, even if
only a subset of its sub-registers is needed. This patch
introduces support for partial reloads by analyzing actual
register usage and restoring only the required sub-registers.
This improvement enhances register allocation efficiency,
particularly for cases involving tuple virtual registers.
For AMDGPU, this change brings considerable improvements
in workloads that involve matrix operations, large vectors,
and complex control flows.
Diffstat (limited to 'llvm/lib/CodeGen')
| -rw-r--r-- | llvm/lib/CodeGen/InlineSpiller.cpp | 57 |
1 files changed, 51 insertions, 6 deletions
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 6837030..c567b88f 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -217,7 +217,8 @@ private: bool coalesceStackAccess(MachineInstr *MI, Register Reg); bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>>, MachineInstr *LoadMI = nullptr); - void insertReload(Register VReg, SlotIndex, MachineBasicBlock::iterator MI); + void insertReload(Register VReg, unsigned SubReg, SlotIndex, + MachineBasicBlock::iterator MI); void insertSpill(Register VReg, bool isKill, MachineBasicBlock::iterator MI); void spillAroundUses(Register Reg); @@ -1112,14 +1113,14 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, return true; } -void InlineSpiller::insertReload(Register NewVReg, +void InlineSpiller::insertReload(Register NewVReg, unsigned SubReg, SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); MachineInstrSpan MIS(MI, &MBB); TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, - MRI.getRegClass(NewVReg), Register()); + MRI.getRegClass(NewVReg), Register(), SubReg); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); @@ -1248,10 +1249,51 @@ void InlineSpiller::spillAroundUses(Register Reg) { // Create a new virtual register for spill/fill. // FIXME: Infer regclass from instruction alone. - Register NewVReg = Edit->createFrom(Reg); + + unsigned SubReg = 0; + LaneBitmask CoveringLanes = LaneBitmask::getNone(); + // If the subreg liveness is enabled, identify the subreg use(s) to try + // subreg reload. Skip if the instruction also defines the register. + // For copy bundles, get the covering lane masks. + if (MRI.subRegLivenessEnabled() && !RI.Writes) { + for (auto [MI, OpIdx] : Ops) { + const MachineOperand &MO = MI->getOperand(OpIdx); + assert(MO.isReg() && MO.getReg() == Reg); + if (MO.isUse()) { + SubReg = MO.getSubReg(); + if (SubReg) + CoveringLanes |= TRI.getSubRegIndexLaneMask(SubReg); + } + } + } + + if (MI.isBundled() && CoveringLanes.any()) { + CoveringLanes = LaneBitmask(bit_ceil(CoveringLanes.getAsInteger()) - 1); + // Obtain the covering subregister index, including any missing indices + // within the identified small range. Although this may be suboptimal due + // to gaps in the subregisters that are not part of the copy bundle, it is + // benificial when components outside this range of the original tuple can + // be completely skipped from the reload. + SubReg = TRI.getSubRegIdxFromLaneMask(CoveringLanes); + } + + // If the target doesn't support subreg reload, fallback to restoring the + // full tuple. + if (SubReg && !TRI.shouldEnableSubRegReload(SubReg)) + SubReg = 0; + + const TargetRegisterClass *OrigRC = MRI.getRegClass(Reg); + const TargetRegisterClass *NewRC = + SubReg ? TRI.getSubRegisterClass(OrigRC, SubReg) : nullptr; + + // Check if the target needs to constrain the RC further. + if (NewRC) + NewRC = TRI.getConstrainedRegClass(NewRC); + + Register NewVReg = Edit->createFrom(Reg, NewRC); if (RI.Reads) - insertReload(NewVReg, Idx, &MI); + insertReload(NewVReg, SubReg, Idx, &MI); // Rewrite instruction operands. bool hasLiveDef = false; @@ -1259,7 +1301,10 @@ void InlineSpiller::spillAroundUses(Register Reg) { MachineOperand &MO = OpPair.first->getOperand(OpPair.second); MO.setReg(NewVReg); if (MO.isUse()) { - if (!OpPair.first->isRegTiedToDefOperand(OpPair.second)) + if (SubReg && !MI.isBundled()) + MO.setSubReg(0); + if (!OpPair.first->isRegTiedToDefOperand(OpPair.second) || + (SubReg && !MI.isBundled())) MO.setIsKill(); } else { if (!MO.isDead()) |
