aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp47
1 files changed, 47 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index f62e808..4069a36 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -312,6 +312,10 @@ public:
SIMemOp Op, bool IsVolatile,
bool IsNonTemporal) const = 0;
+ virtual bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const {
+ return false;
+ };
+
/// Inserts any necessary instructions at position \p Pos relative
/// to instruction \p MI to ensure memory instructions before \p Pos of kind
/// \p Op associated with address spaces \p AddrSpace have completed. Used
@@ -589,6 +593,15 @@ protected:
bool setScope(const MachineBasicBlock::iterator MI,
AMDGPU::CPol::CPol Value) const;
+ // Stores with system scope (SCOPE_SYS) need to wait for:
+ // - loads or atomics(returning) - wait for {LOAD|SAMPLE|BVH|KM}CNT==0
+ // - non-returning-atomics - wait for STORECNT==0
+ // TODO: SIInsertWaitcnts will not always be able to remove STORECNT waits
+ // since it does not distinguish atomics-with-return from regular stores.
+ // There is no need to wait if memory is cached (mtype != UC).
+ bool
+ insertWaitsBeforeSystemScopeStore(const MachineBasicBlock::iterator MI) const;
+
public:
SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) {}
@@ -603,6 +616,8 @@ public:
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile,
bool IsNonTemporal) const override;
+
+ bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const override;
};
class SIMemoryLegalizer final : public MachineFunctionPass {
@@ -2194,6 +2209,22 @@ bool SIGfx12CacheControl::setScope(const MachineBasicBlock::iterator MI,
return false;
}
+bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
+ const MachineBasicBlock::iterator MI) const {
+ // TODO: implement flag for frontend to give us a hint not to insert waits.
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ const DebugLoc &DL = MI->getDebugLoc();
+
+ BuildMI(MBB, MI, DL, TII->get(S_WAIT_LOADCNT_soft)).addImm(0);
+ BuildMI(MBB, MI, DL, TII->get(S_WAIT_SAMPLECNT_soft)).addImm(0);
+ BuildMI(MBB, MI, DL, TII->get(S_WAIT_BVHCNT_soft)).addImm(0);
+ BuildMI(MBB, MI, DL, TII->get(S_WAIT_KMCNT_soft)).addImm(0);
+ BuildMI(MBB, MI, DL, TII->get(S_WAIT_STORECNT_soft)).addImm(0);
+
+ return true;
+}
+
bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
@@ -2364,6 +2395,9 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
if (IsVolatile) {
Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS);
+ if (Op == SIMemOp::STORE)
+ Changed |= insertWaitsBeforeSystemScopeStore(MI);
+
// Ensure operation has completed at system scope to cause all volatile
// operations to be visible outside the program in a global order. Do not
// request cross address space as only the global address space can be
@@ -2381,6 +2415,15 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
return Changed;
}
+bool SIGfx12CacheControl::expandSystemScopeStore(
+ MachineBasicBlock::iterator &MI) const {
+ MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
+ if (CPol && ((CPol->getImm() & CPol::SCOPE) == CPol::SCOPE_SYS))
+ return insertWaitsBeforeSystemScopeStore(MI);
+
+ return false;
+}
+
bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
if (AtomicPseudoMIs.empty())
return false;
@@ -2467,6 +2510,10 @@ bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
Changed |= CC->enableVolatileAndOrNonTemporal(
MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
MOI.isNonTemporal());
+
+ // GFX12 specific, scope(desired coherence domain in cache hierarchy) is
+ // instruction field, do not confuse it with atomic scope.
+ Changed |= CC->expandSystemScopeStore(MI);
return Changed;
}