aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
diff options
context:
space:
mode:
authorSameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>2025-07-30 11:23:28 +0530
committerGitHub <noreply@github.com>2025-07-30 11:23:28 +0530
commit8f187c74b3ad77ef8a15bc3d2d718ccd88edb873 (patch)
treefdaa1517640477bb7887fdf9591a87c857b6db38 /llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
parenteddd34227ec2770c81d260826e2c31f4d5136f8f (diff)
downloadllvm-8f187c74b3ad77ef8a15bc3d2d718ccd88edb873.zip
llvm-8f187c74b3ad77ef8a15bc3d2d718ccd88edb873.tar.gz
llvm-8f187c74b3ad77ef8a15bc3d2d718ccd88edb873.tar.bz2
[AMDGPU] introduce S_WAITCNT_LDS_DIRECT in the memory legalizer (#150887)
The new instruction represents the unknown number of waitcnts needed at a release operation to ensure that prior direct loads to LDS (formerly called LDS DMA) are completed. The instruction is replaced in SIInsertWaitcnts with a suitable value for vmcnt(). Co-authored-by: Austin Kerbow <austin.kerbow@amd.com>.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp20
1 files changed, 20 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 025731a..53f554e 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -1170,6 +1170,16 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
Changed = true;
}
+ // On architectures that support direct loads to LDS, emit an unknown waitcnt
+ // at workgroup-scoped release operations that specify the LDS address space.
+ // SIInsertWaitcnts will later replace this with a vmcnt().
+ if (ST.hasVMemToLDSLoad() && isReleaseOrStronger(Order) &&
+ Scope == SIAtomicScope::WORKGROUP &&
+ (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_lds_direct));
+ Changed = true;
+ }
+
if (Pos == Position::AFTER)
--MI;
@@ -2078,6 +2088,16 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
Changed = true;
}
+ // On architectures that support direct loads to LDS, emit an unknown waitcnt
+ // at workgroup-scoped release operations that specify the LDS address space.
+ // SIInsertWaitcnts will later replace this with a vmcnt().
+ if (ST.hasVMemToLDSLoad() && isReleaseOrStronger(Order) &&
+ Scope == SIAtomicScope::WORKGROUP &&
+ (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_lds_direct));
+ Changed = true;
+ }
+
if (VSCnt) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT_soft))
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)