diff options
author | Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com> | 2025-07-30 11:23:28 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-30 11:23:28 +0530 |
commit | 8f187c74b3ad77ef8a15bc3d2d718ccd88edb873 (patch) | |
tree | fdaa1517640477bb7887fdf9591a87c857b6db38 /llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | |
parent | eddd34227ec2770c81d260826e2c31f4d5136f8f (diff) | |
download | llvm-8f187c74b3ad77ef8a15bc3d2d718ccd88edb873.zip llvm-8f187c74b3ad77ef8a15bc3d2d718ccd88edb873.tar.gz llvm-8f187c74b3ad77ef8a15bc3d2d718ccd88edb873.tar.bz2 |
[AMDGPU] introduce S_WAITCNT_LDS_DIRECT in the memory legalizer (#150887)
The new instruction represents the unknown number of waitcnts needed at a
release operation to ensure that prior direct loads to LDS (formerly called LDS
DMA) are completed. The instruction is replaced in SIInsertWaitcnts with a
suitable value for vmcnt().
Co-authored-by: Austin Kerbow <austin.kerbow@amd.com>.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 025731a..53f554e 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -1170,6 +1170,16 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI, Changed = true; } + // On architectures that support direct loads to LDS, emit an unknown waitcnt + // at workgroup-scoped release operations that specify the LDS address space. + // SIInsertWaitcnts will later replace this with a vmcnt(). + if (ST.hasVMemToLDSLoad() && isReleaseOrStronger(Order) && + Scope == SIAtomicScope::WORKGROUP && + (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) { + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_lds_direct)); + Changed = true; + } + if (Pos == Position::AFTER) --MI; @@ -2078,6 +2088,16 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI, Changed = true; } + // On architectures that support direct loads to LDS, emit an unknown waitcnt + // at workgroup-scoped release operations that specify the LDS address space. + // SIInsertWaitcnts will later replace this with a vmcnt(). + if (ST.hasVMemToLDSLoad() && isReleaseOrStronger(Order) && + Scope == SIAtomicScope::WORKGROUP && + (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) { + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_lds_direct)); + Changed = true; + } + if (VSCnt) { BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT_soft)) .addReg(AMDGPU::SGPR_NULL, RegState::Undef) |