diff options
| author | Sirish Pande <sirpande@amd.com> | 2023-10-18 08:43:29 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-10-18 08:43:29 -0500 |
| commit | 28e4f97320b6d3cb198f9865b6379ea1ca988cf8 (patch) | |
| tree | 4bb5ae0578796859adfc7e2b40979c6f28184715 /llvm/lib | |
| parent | 18e5055db39b41e00dbeb7ca820dd82cce46f65e (diff) | |
| download | llvm-28e4f97320b6d3cb198f9865b6379ea1ca988cf8.zip llvm-28e4f97320b6d3cb198f9865b6379ea1ca988cf8.tar.gz llvm-28e4f97320b6d3cb198f9865b6379ea1ca988cf8.tar.bz2 | |
[AMDGPU] Save/Restore SCC bit across waterfall loop. (#68363)
Waterfall loop is overwriting SCC bit of status register. Make sure SCC
bit is saved and restored across.
We need to save/restore only in cases where SCC is live across waterfall
loop.
Co-authored-by: Sirish Pande <sirish.pande@amd.com>
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 2ad0755..b2d3236 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6079,6 +6079,17 @@ loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); + // Save SCC. Waterfall Loop may overwrite SCC. + Register SaveSCCReg; + bool SCCNotDead = (MBB.computeRegisterLiveness(TRI, AMDGPU::SCC, MI, 30) != + MachineBasicBlock::LQR_Dead); + if (SCCNotDead) { + SaveSCCReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(MBB, Begin, DL, TII.get(AMDGPU::S_CSELECT_B32), SaveSCCReg) + .addImm(1) + .addImm(0); + } + Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); // Save the EXEC mask @@ -6134,8 +6145,15 @@ loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, emitLoadScalarOpsFromVGPRLoop(TII, MRI, MBB, *LoopBB, *BodyBB, DL, ScalarOps); - // Restore the EXEC mask MachineBasicBlock::iterator First = RemainderBB->begin(); + // Restore SCC + if (SCCNotDead) { + BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_CMP_LG_U32)) + .addReg(SaveSCCReg, RegState::Kill) + .addImm(0); + } + + // Restore the EXEC mask BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec); return BodyBB; } |
