aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
diff options
context:
space:
mode:
authorDiana Picus <Diana-Magda.Picus@amd.com>2025-04-23 10:33:36 +0200
committerGitHub <noreply@github.com>2025-04-23 10:33:36 +0200
commit4a58071d87265dfccba72134b25cf4d1595d98c5 (patch)
treefd42f47cb7fc3f3b59dff523142160bf43661c09 /llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
parent6db447f824d46956172b104f08105b25f9428f55 (diff)
downloadllvm-4a58071d87265dfccba72134b25cf4d1595d98c5.zip
llvm-4a58071d87265dfccba72134b25cf4d1595d98c5.tar.gz
llvm-4a58071d87265dfccba72134b25cf4d1595d98c5.tar.bz2
[AMDGPU] Support block load/store for CSR (#130013)
Add support for using the existing `SCRATCH_STORE_BLOCK` and `SCRATCH_LOAD_BLOCK` instructions for saving and restoring callee-saved VGPRs. This is controlled by a new subtarget feature, `block-vgpr-csr`. It does not include WWM registers - those will be saved and restored individually, just like before. This patch does not change the ABI. Use of this feature may lead to slightly increased stack usage, because the memory is not compacted if certain registers don't have to be transferred (this will happen in practice for calling conventions where the callee and caller saved registers are interleaved in groups of 8). However, if the registers at the end of the block of 32 don't have to be transferred, we don't need to use a whole 128-byte stack slot - we can trim some space off the end of the range. In order to implement this feature, we need to rely less on the target-independent code in the PrologEpilogInserter, so we override several new methods in `SIFrameLowering`. We also add new pseudos, `SI_BLOCK_SPILL_V1024_SAVE/RESTORE`. One peculiarity is that both the SI_BLOCK_V1024_RESTORE pseudo and the SCRATCH_LOAD_BLOCK instructions will have all the registers that are not transferred added as implicit uses. This is done in order to inform LiveRegUnits that those registers are not available before the restore (since we're not really restoring them - so we can't afford to scavenge them). Unfortunately, this trick doesn't work with the save, so before the save all the registers in the block will be unavailable (see the unit test).
Diffstat (limited to 'llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp')
-rw-r--r--llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp35
1 files changed, 35 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index be73b73..70c3b2c 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
@@ -182,3 +183,37 @@ TargetFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
return DwarfFrameBase{DwarfFrameBase::Register, {RI->getFrameRegister(MF).id()}};
}
+
+void TargetFrameLowering::spillCalleeSavedRegister(
+ MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI,
+ const CalleeSavedInfo &CS, const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) const {
+ // Insert the spill to the stack frame.
+ MCRegister Reg = CS.getReg();
+
+ if (CS.isSpilledToReg()) {
+ BuildMI(SaveBlock, MI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ CS.getDstReg())
+ .addReg(Reg, getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII->storeRegToStackSlot(SaveBlock, MI, Reg, true, CS.getFrameIdx(), RC,
+ TRI, Register());
+ }
+}
+
+void TargetFrameLowering::restoreCalleeSavedRegister(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const CalleeSavedInfo &CS, const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) const {
+ MCRegister Reg = CS.getReg();
+ if (CS.isSpilledToReg()) {
+ BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(CS.getDstReg(), getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII->loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
+ Register());
+ assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
+ }
+}