aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorSirish Pande <sirpande@amd.com>2023-10-18 08:43:29 -0500
committerGitHub <noreply@github.com>2023-10-18 08:43:29 -0500
commit28e4f97320b6d3cb198f9865b6379ea1ca988cf8 (patch)
tree4bb5ae0578796859adfc7e2b40979c6f28184715 /llvm/lib
parent18e5055db39b41e00dbeb7ca820dd82cce46f65e (diff)
downloadllvm-28e4f97320b6d3cb198f9865b6379ea1ca988cf8.zip
llvm-28e4f97320b6d3cb198f9865b6379ea1ca988cf8.tar.gz
llvm-28e4f97320b6d3cb198f9865b6379ea1ca988cf8.tar.bz2
[AMDGPU] Save/Restore SCC bit across waterfall loop. (#68363)
Waterfall loop is overwriting SCC bit of status register. Make sure SCC bit is saved and restored across. We need to save/restore only in cases where SCC is live across waterfall loop. Co-authored-by: Sirish Pande <sirish.pande@amd.com>
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp20
1 files changed, 19 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2ad0755..b2d3236 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6079,6 +6079,17 @@ loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+ // Save SCC. Waterfall Loop may overwrite SCC.
+ Register SaveSCCReg;
+ bool SCCNotDead = (MBB.computeRegisterLiveness(TRI, AMDGPU::SCC, MI, 30) !=
+ MachineBasicBlock::LQR_Dead);
+ if (SCCNotDead) {
+ SaveSCCReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(MBB, Begin, DL, TII.get(AMDGPU::S_CSELECT_B32), SaveSCCReg)
+ .addImm(1)
+ .addImm(0);
+ }
+
Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
// Save the EXEC mask
@@ -6134,8 +6145,15 @@ loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
emitLoadScalarOpsFromVGPRLoop(TII, MRI, MBB, *LoopBB, *BodyBB, DL, ScalarOps);
- // Restore the EXEC mask
MachineBasicBlock::iterator First = RemainderBB->begin();
+ // Restore SCC
+ if (SCCNotDead) {
+ BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_CMP_LG_U32))
+ .addReg(SaveSCCReg, RegState::Kill)
+ .addImm(0);
+ }
+
+ // Restore the EXEC mask
BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec);
return BodyBB;
}