aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2025-09-24 10:04:06 -0700
committerGitHub <noreply@github.com>2025-09-24 10:04:06 -0700
commit1becadeebc76db49300a74666c846047d027733e (patch)
tree76c8f1814ef7c20fb08605724397630719fda3b1 /llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
parent844150de8ab7b28e1de5e003335b6f39cc671b70 (diff)
downloadllvm-1becadeebc76db49300a74666c846047d027733e.zip
llvm-1becadeebc76db49300a74666c846047d027733e.tar.gz
llvm-1becadeebc76db49300a74666c846047d027733e.tar.bz2
[AMDGPU] Update comments in memory legalizer. NFC (#160453)
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp19
1 files changed, 14 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 27dc4ea..484861d 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -106,6 +106,7 @@ private:
bool IsLastUse = false;
bool IsCooperative = false;
+ // TODO: Should we assume Cooperative=true if no MMO is present?
SIMemOpInfo(
const GCNSubtarget &ST,
AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
@@ -338,6 +339,11 @@ public:
bool IsNonTemporal,
bool IsLastUse = false) const = 0;
+ /// Add final touches to a `mayStore` instruction \p MI, which may be a
+ /// Store or RMW instruction.
+ /// FIXME: This takes a MI because iterators aren't handled properly. When
+ /// this is called, they often point to entirely different insts. Thus we back
+ /// up the inst early and pass it here instead.
virtual bool finalizeStore(MachineInstr &MI, bool Atomic) const {
return false;
};
@@ -2381,7 +2387,10 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// which shares the same L0.
//
// GFX12.5:
- // TODO DOCS
+ // CU$ has two ports. To ensure operations are visible at the workgroup
+ // level, we need to ensure all operations in this port have completed
+ // so the other SIMDs in the WG can see them. There is no ordering
+ // guarantee between the ports.
if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts()) {
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
LOADCnt |= true;
@@ -2496,8 +2505,7 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
// Otherwise in CU mode all waves of a work-group are on the same CU, and
// so the L0 does not need to be invalidated.
//
- // GFX12.5
- // TODO DOCS
+ // GFX12.5 has a shared WGP$, so no invalidates are required.
if (ST.isCuModeEnabled())
return false;
@@ -2541,7 +2549,8 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
++MI;
// global_wb is only necessary at system scope for GFX12.0,
- // they're also necessary at device scope for GFX12.5.
+ // they're also necessary at device scope for GFX12.5 as stores
+ // cannot report completion earlier than L2.
//
// Emitting it for lower scopes is a slow no-op, so we omit it
// for performance.
@@ -2552,7 +2561,7 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
Changed = true;
break;
case SIAtomicScope::AGENT:
- // TODO DOCS
+ // GFX12.5 may have >1 L2 per device so we must emit a device scope WB.
if (ST.hasGFX1250Insts()) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB))
.addImm(AMDGPU::CPol::SCOPE_DEV);