aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll313
1 files changed, 115 insertions, 198 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
index f1879f2..d7f780e 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
@@ -43,8 +43,7 @@ define amdgpu_kernel void @atomic_add_i64_offset(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_add_u64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -101,8 +100,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_add_u64 v[0:1], v[0:1], v[2:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -166,8 +164,7 @@ define amdgpu_kernel void @atomic_add_i64_addr64_offset(ptr %out, i64 %in, i64 %
; GFX12-NEXT: flat_atomic_add_u64 v[2:3], v[0:1] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -230,8 +227,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(ptr %out, ptr %out2,
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_add_u64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -279,8 +275,7 @@ define amdgpu_kernel void @atomic_add_i64(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_add_u64 v[0:1], v[2:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile add ptr %out, i64 %in syncscope("agent") seq_cst
@@ -332,8 +327,7 @@ define amdgpu_kernel void @atomic_add_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_add_u64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -392,8 +386,7 @@ define amdgpu_kernel void @atomic_add_i64_addr64(ptr %out, i64 %in, i64 %index)
; GFX12-NEXT: flat_atomic_add_u64 v[2:3], v[0:1]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -451,8 +444,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_add_u64 v[0:1], v[2:3], v[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -503,8 +495,7 @@ define amdgpu_kernel void @atomic_and_i64_offset(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_and_b64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -561,8 +552,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_and_b64 v[0:1], v[0:1], v[2:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -626,8 +616,7 @@ define amdgpu_kernel void @atomic_and_i64_addr64_offset(ptr %out, i64 %in, i64 %
; GFX12-NEXT: flat_atomic_and_b64 v[2:3], v[0:1] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -690,8 +679,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(ptr %out, ptr %out2,
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_and_b64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -739,8 +727,7 @@ define amdgpu_kernel void @atomic_and_i64(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_and_b64 v[0:1], v[2:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile and ptr %out, i64 %in syncscope("agent") seq_cst
@@ -792,8 +779,7 @@ define amdgpu_kernel void @atomic_and_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_and_b64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -852,8 +838,7 @@ define amdgpu_kernel void @atomic_and_i64_addr64(ptr %out, i64 %in, i64 %index)
; GFX12-NEXT: flat_atomic_and_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -911,8 +896,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_and_b64 v[0:1], v[2:3], v[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -963,8 +947,7 @@ define amdgpu_kernel void @atomic_sub_i64_offset(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_sub_u64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -1021,8 +1004,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_sub_u64 v[0:1], v[0:1], v[2:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -1086,8 +1068,7 @@ define amdgpu_kernel void @atomic_sub_i64_addr64_offset(ptr %out, i64 %in, i64 %
; GFX12-NEXT: flat_atomic_sub_u64 v[2:3], v[0:1] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -1150,8 +1131,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(ptr %out, ptr %out2,
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_sub_u64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -1199,8 +1179,7 @@ define amdgpu_kernel void @atomic_sub_i64(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_sub_u64 v[0:1], v[2:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile sub ptr %out, i64 %in syncscope("agent") seq_cst
@@ -1252,8 +1231,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_sub_u64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -1312,8 +1290,7 @@ define amdgpu_kernel void @atomic_sub_i64_addr64(ptr %out, i64 %in, i64 %index)
; GFX12-NEXT: flat_atomic_sub_u64 v[2:3], v[0:1]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -1371,8 +1348,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_sub_u64 v[0:1], v[2:3], v[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -1421,7 +1397,7 @@ define amdgpu_kernel void @atomic_max_i64_offset(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_max_i64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -1478,7 +1454,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_max_i64 v[0:1], v[0:1], v[2:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -1540,7 +1516,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
; GFX12-NEXT: flat_atomic_max_i64 v[2:3], v[0:1] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -1603,7 +1579,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_max_i64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -1649,7 +1625,7 @@ define amdgpu_kernel void @atomic_max_i64(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_max_i64 v[0:1], v[2:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile max ptr %out, i64 %in syncscope("workgroup") seq_cst
@@ -1701,7 +1677,7 @@ define amdgpu_kernel void @atomic_max_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_max_i64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -1758,7 +1734,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
; GFX12-NEXT: flat_atomic_max_i64 v[2:3], v[0:1]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -1816,7 +1792,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_max_i64 v[0:1], v[2:3], v[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -1865,7 +1841,7 @@ define amdgpu_kernel void @atomic_umax_i64_offset(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_max_u64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -1922,7 +1898,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_offset(ptr %out, ptr %out2, i64 %
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_max_u64 v[0:1], v[0:1], v[2:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -1984,7 +1960,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
; GFX12-NEXT: flat_atomic_max_u64 v[2:3], v[0:1] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -2047,7 +2023,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_max_u64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -2093,7 +2069,7 @@ define amdgpu_kernel void @atomic_umax_i64(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_max_u64 v[0:1], v[2:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile umax ptr %out, i64 %in syncscope("workgroup") seq_cst
@@ -2145,7 +2121,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_max_u64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -2202,7 +2178,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64(ptr %out, i64 %in, i64 %index)
; GFX12-NEXT: flat_atomic_max_u64 v[2:3], v[0:1]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -2260,7 +2236,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_max_u64 v[0:1], v[2:3], v[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -2309,7 +2285,7 @@ define amdgpu_kernel void @atomic_min_i64_offset(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_min_i64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -2366,7 +2342,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_min_i64 v[0:1], v[0:1], v[2:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -2428,7 +2404,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
; GFX12-NEXT: flat_atomic_min_i64 v[2:3], v[0:1] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -2491,7 +2467,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_min_i64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -2537,7 +2513,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_min_i64 v[0:1], v[2:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile min ptr %out, i64 %in syncscope("workgroup") seq_cst
@@ -2589,7 +2565,7 @@ define amdgpu_kernel void @atomic_min_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_min_i64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -2646,7 +2622,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64(ptr %out, i64 %in, i64 %index)
; GFX12-NEXT: flat_atomic_min_i64 v[2:3], v[0:1]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -2704,7 +2680,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_min_i64 v[0:1], v[2:3], v[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -2753,7 +2729,7 @@ define amdgpu_kernel void @atomic_umin_i64_offset(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_min_u64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -2810,7 +2786,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_offset(ptr %out, ptr %out2, i64 %
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_min_u64 v[0:1], v[0:1], v[2:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -2872,7 +2848,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64_offset(ptr %out, i64 %in, i64
; GFX12-NEXT: flat_atomic_min_u64 v[2:3], v[0:1] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -2935,7 +2911,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(ptr %out, ptr %out2
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_min_u64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -2981,7 +2957,7 @@ define amdgpu_kernel void @atomic_umin_i64(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_min_u64 v[0:1], v[2:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile umin ptr %out, i64 %in syncscope("workgroup") seq_cst
@@ -3033,7 +3009,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_min_u64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -3090,7 +3066,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64(ptr %out, i64 %in, i64 %index)
; GFX12-NEXT: flat_atomic_min_u64 v[2:3], v[0:1]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -3148,7 +3124,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64(ptr %out, ptr %out2, i64 %
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_min_u64 v[0:1], v[2:3], v[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -3199,8 +3175,7 @@ define amdgpu_kernel void @atomic_or_i64_offset(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_or_b64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -3257,8 +3232,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_offset(ptr %out, ptr %out2, i64 %in
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_or_b64 v[0:1], v[0:1], v[2:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -3322,8 +3296,7 @@ define amdgpu_kernel void @atomic_or_i64_addr64_offset(ptr %out, i64 %in, i64 %i
; GFX12-NEXT: flat_atomic_or_b64 v[2:3], v[0:1] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -3386,8 +3359,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(ptr %out, ptr %out2,
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_or_b64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -3435,8 +3407,7 @@ define amdgpu_kernel void @atomic_or_i64(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_or_b64 v[0:1], v[2:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile or ptr %out, i64 %in syncscope("agent") seq_cst
@@ -3488,8 +3459,7 @@ define amdgpu_kernel void @atomic_or_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_or_b64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -3548,8 +3518,7 @@ define amdgpu_kernel void @atomic_or_i64_addr64(ptr %out, i64 %in, i64 %index) {
; GFX12-NEXT: flat_atomic_or_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -3607,8 +3576,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64(ptr %out, ptr %out2, i64 %in
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_or_b64 v[0:1], v[2:3], v[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -3659,8 +3627,7 @@ define amdgpu_kernel void @atomic_xchg_i64_offset(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_swap_b64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -3708,8 +3675,7 @@ define amdgpu_kernel void @atomic_xchg_f64_offset(ptr %out, double %in) {
; GFX12-NEXT: flat_atomic_swap_b64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr double, ptr %out, i64 4
@@ -3757,8 +3723,7 @@ define amdgpu_kernel void @atomic_xchg_pointer_offset(ptr %out, ptr %in) {
; GFX12-NEXT: flat_atomic_swap_b64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr ptr, ptr %out, i32 4
@@ -3815,8 +3780,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_offset(ptr %out, ptr %out2, i64 %
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_swap_b64 v[0:1], v[0:1], v[2:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -3880,8 +3844,7 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(ptr %out, i64 %in, i64
; GFX12-NEXT: flat_atomic_swap_b64 v[2:3], v[0:1] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -3944,8 +3907,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(ptr %out, ptr %out2
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_swap_b64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -3993,8 +3955,7 @@ define amdgpu_kernel void @atomic_xchg_i64(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_swap_b64 v[0:1], v[2:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile xchg ptr %out, i64 %in syncscope("agent") seq_cst
@@ -4046,8 +4007,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_swap_b64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -4106,8 +4066,7 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64(ptr %out, i64 %in, i64 %index)
; GFX12-NEXT: flat_atomic_swap_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -4165,8 +4124,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(ptr %out, ptr %out2, i64 %
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_swap_b64 v[0:1], v[2:3], v[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -4217,8 +4175,7 @@ define amdgpu_kernel void @atomic_xor_i64_offset(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_xor_b64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -4275,8 +4232,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_xor_b64 v[0:1], v[0:1], v[2:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -4340,8 +4296,7 @@ define amdgpu_kernel void @atomic_xor_i64_addr64_offset(ptr %out, i64 %in, i64 %
; GFX12-NEXT: flat_atomic_xor_b64 v[2:3], v[0:1] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -4404,8 +4359,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(ptr %out, ptr %out2,
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_xor_b64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -4453,8 +4407,7 @@ define amdgpu_kernel void @atomic_xor_i64(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_xor_b64 v[0:1], v[2:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile xor ptr %out, i64 %in syncscope("agent") seq_cst
@@ -4506,8 +4459,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_xor_b64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -4566,8 +4518,7 @@ define amdgpu_kernel void @atomic_xor_i64_addr64(ptr %out, i64 %in, i64 %index)
; GFX12-NEXT: flat_atomic_xor_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -4625,8 +4576,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_xor_b64 v[0:1], v[2:3], v[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -4678,8 +4628,7 @@ define amdgpu_kernel void @atomic_load_i64_offset(ptr %in, ptr %out) {
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_load_b64 v[0:1], v[0:1] offset:32 th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
entry:
@@ -4726,8 +4675,7 @@ define amdgpu_kernel void @atomic_load_i64(ptr %in, ptr %out) {
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_load_b64 v[0:1], v[0:1] th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
entry:
@@ -4790,8 +4738,7 @@ define amdgpu_kernel void @atomic_load_i64_addr64_offset(ptr %in, ptr %out, i64
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-NEXT: flat_load_b64 v[0:1], v[0:1] offset:32 th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
entry:
@@ -4852,8 +4799,7 @@ define amdgpu_kernel void @atomic_load_i64_addr64(ptr %in, ptr %out, i64 %index)
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-NEXT: flat_load_b64 v[0:1], v[0:1] th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
entry:
@@ -5094,8 +5040,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old
; GFX12-NEXT: flat_atomic_cmpswap_b64 v[4:5], v[0:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -5152,8 +5097,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol
; GFX12-NEXT: flat_atomic_cmpswap_b64 v[4:5], v[0:3] offset:72000
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 9000
@@ -5211,8 +5155,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_atomic_cmpswap_b64 v[0:1], v[4:5], v[0:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -5277,8 +5220,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
; GFX12-NEXT: flat_atomic_cmpswap_b64 v[4:5], v[0:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -5349,8 +5291,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o
; GFX12-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
; GFX12-NEXT: flat_atomic_cmpswap_b64 v[0:1], v[4:5], v[0:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -5408,8 +5349,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, i64 %in, i64 %old) {
; GFX12-NEXT: flat_atomic_cmpswap_b64 v[4:5], v[0:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
@@ -5462,8 +5402,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret(ptr %out, ptr %out2, i64 %in,
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_atomic_cmpswap_b64 v[0:1], v[4:5], v[0:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -5523,8 +5462,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind
; GFX12-NEXT: flat_atomic_cmpswap_b64 v[4:5], v[0:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -5590,8 +5528,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6
; GFX12-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
; GFX12-NEXT: flat_atomic_cmpswap_b64 v[0:1], v[4:5], v[0:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -5644,8 +5581,7 @@ define amdgpu_kernel void @atomic_load_f64_offset(ptr %in, ptr %out) {
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_load_b64 v[0:1], v[0:1] offset:32 th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
entry:
@@ -5692,8 +5628,7 @@ define amdgpu_kernel void @atomic_load_f64(ptr %in, ptr %out) {
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_load_b64 v[0:1], v[0:1] th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
entry:
@@ -5756,8 +5691,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64_offset(ptr %in, ptr %out, i64
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-NEXT: flat_load_b64 v[0:1], v[0:1] offset:32 th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
entry:
@@ -5818,8 +5752,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64(ptr %in, ptr %out, i64 %index)
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-NEXT: flat_load_b64 v[0:1], v[0:1] th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
entry:
@@ -6051,8 +5984,7 @@ define amdgpu_kernel void @atomic_inc_i64_offset(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -6109,8 +6041,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_inc_u64 v[0:1], v[0:1], v[2:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -6174,8 +6105,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr %out, i64 %in, i64 %
; GFX12-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -6238,8 +6168,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64_offset(ptr %out, ptr %out2,
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -6287,8 +6216,7 @@ define amdgpu_kernel void @atomic_inc_i64(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile uinc_wrap ptr %out, i64 %in syncscope("agent") seq_cst
@@ -6340,8 +6268,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_inc_u64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -6400,8 +6327,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64(ptr %out, i64 %in, i64 %index)
; GFX12-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -6459,8 +6385,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3], v[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -6511,8 +6436,7 @@ define amdgpu_kernel void @atomic_dec_i64_offset(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_dec_u64 v[0:1], v[2:3] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr %out, i64 4
@@ -6569,8 +6493,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_dec_u64 v[0:1], v[0:1], v[2:3] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -6634,8 +6557,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr %out, i64 %in, i64 %
; GFX12-NEXT: flat_atomic_dec_u64 v[2:3], v[0:1] offset:32
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -6698,8 +6620,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64_offset(ptr %out, ptr %out2,
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_dec_u64 v[0:1], v[2:3], v[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -6747,8 +6668,7 @@ define amdgpu_kernel void @atomic_dec_i64(ptr %out, i64 %in) {
; GFX12-NEXT: flat_atomic_dec_u64 v[0:1], v[2:3]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile udec_wrap ptr %out, i64 %in syncscope("agent") seq_cst
@@ -6800,8 +6720,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret(ptr %out, ptr %out2, i64 %in) {
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
; GFX12-NEXT: flat_atomic_dec_u64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
@@ -6860,8 +6779,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64(ptr %out, i64 %in, i64 %index)
; GFX12-NEXT: flat_atomic_dec_u64 v[2:3], v[0:1]
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr %out, i64 %index
@@ -6919,8 +6837,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64(ptr %out, ptr %out2, i64 %i
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_atomic_dec_u64 v[0:1], v[2:3], v[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm