aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2023-03-07 12:39:49 -0800
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2023-03-08 01:24:20 -0800
commit59162e38590fbe194e2f5dc11bcfc02bffeb75fc (patch)
treec7cba1f0950d3b1a0904655bcb7da0c9152bdc77 /llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
parent173e54c3677420e5774a120a4bc4208093bd5cc8 (diff)
downloadllvm-59162e38590fbe194e2f5dc11bcfc02bffeb75fc.zip
llvm-59162e38590fbe194e2f5dc11bcfc02bffeb75fc.tar.gz
llvm-59162e38590fbe194e2f5dc11bcfc02bffeb75fc.tar.bz2
[AMDGPU] Skip buffer_wbl2 before atomic fence acquire
Memory models for gfx90a and gfx940 do not require buffer_wbl2 before the fence for acquire ordering, but we do insert the full release. Fixes: SWDEV-386785 Differential Revision: https://reviews.llvm.org/D145524
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp9
1 files changed, 7 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 68e592b..47b81d7 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -2209,8 +2209,13 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
bool Changed = false;
if (MOI.isAtomic()) {
- if (MOI.getOrdering() == AtomicOrdering::Acquire ||
- MOI.getOrdering() == AtomicOrdering::Release ||
+ if (MOI.getOrdering() == AtomicOrdering::Acquire)
+ Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
+ SIMemOp::LOAD | SIMemOp::STORE,
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
+
+ if (MOI.getOrdering() == AtomicOrdering::Release ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
/// TODO: This relies on a barrier always generating a waitcnt