aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll277
1 files changed, 103 insertions, 174 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
index 3d11c8b..325dae1 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
@@ -51,8 +51,7 @@ define amdgpu_kernel void @atomic_add_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_add_u64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -123,8 +122,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_add_u64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -202,8 +200,7 @@ define amdgpu_kernel void @atomic_add_i64_addr64_offset(ptr addrspace(1) %out, i
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_add_u64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -284,8 +281,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(ptr addrspace(1) %ou
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_add_u64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -349,8 +345,7 @@ define amdgpu_kernel void @atomic_add_i64(ptr addrspace(1) %out, i64 %in) {
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_add_u64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile add ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
@@ -420,8 +415,7 @@ define amdgpu_kernel void @atomic_add_i64_ret(ptr addrspace(1) %out, ptr addrspa
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_add_u64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -496,8 +490,7 @@ define amdgpu_kernel void @atomic_add_i64_addr64(ptr addrspace(1) %out, i64 %in,
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_add_u64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -575,8 +568,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_add_u64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -635,8 +627,7 @@ define amdgpu_kernel void @atomic_and_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_and_b64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -707,8 +698,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_and_b64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -786,8 +776,7 @@ define amdgpu_kernel void @atomic_and_i64_addr64_offset(ptr addrspace(1) %out, i
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_and_b64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -868,8 +857,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(ptr addrspace(1) %ou
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_and_b64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -933,8 +921,7 @@ define amdgpu_kernel void @atomic_and_i64(ptr addrspace(1) %out, i64 %in) {
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_and_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile and ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
@@ -1004,8 +991,7 @@ define amdgpu_kernel void @atomic_and_i64_ret(ptr addrspace(1) %out, ptr addrspa
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_and_b64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -1080,8 +1066,7 @@ define amdgpu_kernel void @atomic_and_i64_addr64(ptr addrspace(1) %out, i64 %in,
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_and_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -1159,8 +1144,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_and_b64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -1219,8 +1203,7 @@ define amdgpu_kernel void @atomic_sub_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_sub_u64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -1291,8 +1274,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_sub_u64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -1370,8 +1352,7 @@ define amdgpu_kernel void @atomic_sub_i64_addr64_offset(ptr addrspace(1) %out, i
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_sub_u64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -1452,8 +1433,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(ptr addrspace(1) %ou
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_sub_u64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -1517,8 +1497,7 @@ define amdgpu_kernel void @atomic_sub_i64(ptr addrspace(1) %out, i64 %in) {
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_sub_u64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile sub ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
@@ -1588,8 +1567,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret(ptr addrspace(1) %out, ptr addrspa
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_sub_u64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -1664,8 +1642,7 @@ define amdgpu_kernel void @atomic_sub_i64_addr64(ptr addrspace(1) %out, i64 %in,
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_sub_u64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -1743,8 +1720,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_sub_u64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -1797,7 +1773,7 @@ define amdgpu_kernel void @atomic_max_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_max_i64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -1865,7 +1841,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_max_i64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -1937,7 +1913,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr addrspace(1) %out, i
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_max_i64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -2015,7 +1991,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr addrspace(1) %ou
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_max_i64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -2073,7 +2049,7 @@ define amdgpu_kernel void @atomic_max_i64(ptr addrspace(1) %out, i64 %in) {
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_max_i64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile max ptr addrspace(1) %out, i64 %in syncscope("workgroup") seq_cst
@@ -2140,7 +2116,7 @@ define amdgpu_kernel void @atomic_max_i64_ret(ptr addrspace(1) %out, ptr addrspa
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_max_i64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -2209,7 +2185,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr addrspace(1) %out, i64 %in,
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_max_i64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -2284,7 +2260,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_max_i64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -2337,7 +2313,7 @@ define amdgpu_kernel void @atomic_umax_i64_offset(ptr addrspace(1) %out, i64 %in
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_max_u64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -2405,7 +2381,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_max_u64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -2477,7 +2453,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr addrspace(1) %out,
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_max_u64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -2555,7 +2531,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr addrspace(1) %o
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_max_u64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -2613,7 +2589,7 @@ define amdgpu_kernel void @atomic_umax_i64(ptr addrspace(1) %out, i64 %in) {
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_max_u64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile umax ptr addrspace(1) %out, i64 %in syncscope("workgroup") seq_cst
@@ -2680,7 +2656,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret(ptr addrspace(1) %out, ptr addrsp
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_max_u64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -2749,7 +2725,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64(ptr addrspace(1) %out, i64 %in
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_max_u64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -2824,7 +2800,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_max_u64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -2877,7 +2853,7 @@ define amdgpu_kernel void @atomic_min_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_min_i64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -2945,7 +2921,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_min_i64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -3017,7 +2993,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr addrspace(1) %out, i
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_min_i64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -3095,7 +3071,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr addrspace(1) %ou
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_min_i64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -3153,7 +3129,7 @@ define amdgpu_kernel void @atomic_min_i64(ptr addrspace(1) %out, i64 %in) {
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_min_i64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile min ptr addrspace(1) %out, i64 %in syncscope("workgroup") seq_cst
@@ -3220,7 +3196,7 @@ define amdgpu_kernel void @atomic_min_i64_ret(ptr addrspace(1) %out, ptr addrspa
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_min_i64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -3289,7 +3265,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64(ptr addrspace(1) %out, i64 %in,
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_min_i64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -3364,7 +3340,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_min_i64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -3417,7 +3393,7 @@ define amdgpu_kernel void @atomic_umin_i64_offset(ptr addrspace(1) %out, i64 %in
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_min_u64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -3485,7 +3461,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_min_u64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -3557,7 +3533,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64_offset(ptr addrspace(1) %out,
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_min_u64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -3635,7 +3611,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(ptr addrspace(1) %o
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_min_u64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -3693,7 +3669,7 @@ define amdgpu_kernel void @atomic_umin_i64(ptr addrspace(1) %out, i64 %in) {
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_min_u64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile umin ptr addrspace(1) %out, i64 %in syncscope("workgroup") seq_cst
@@ -3760,7 +3736,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret(ptr addrspace(1) %out, ptr addrsp
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_min_u64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -3829,7 +3805,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64(ptr addrspace(1) %out, i64 %in
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_min_u64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -3904,7 +3880,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_min_u64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SE
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -3963,8 +3939,7 @@ define amdgpu_kernel void @atomic_or_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_or_b64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -4035,8 +4010,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_offset(ptr addrspace(1) %out, ptr a
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_or_b64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -4114,8 +4088,7 @@ define amdgpu_kernel void @atomic_or_i64_addr64_offset(ptr addrspace(1) %out, i6
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_or_b64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -4196,8 +4169,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(ptr addrspace(1) %out
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_or_b64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -4261,8 +4233,7 @@ define amdgpu_kernel void @atomic_or_i64(ptr addrspace(1) %out, i64 %in) {
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_or_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile or ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
@@ -4332,8 +4303,7 @@ define amdgpu_kernel void @atomic_or_i64_ret(ptr addrspace(1) %out, ptr addrspac
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_or_b64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -4408,8 +4378,7 @@ define amdgpu_kernel void @atomic_or_i64_addr64(ptr addrspace(1) %out, i64 %in,
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_or_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -4487,8 +4456,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64(ptr addrspace(1) %out, ptr a
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_or_b64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -4547,8 +4515,7 @@ define amdgpu_kernel void @atomic_xchg_i64_offset(ptr addrspace(1) %out, i64 %in
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_swap_b64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -4603,8 +4570,7 @@ define amdgpu_kernel void @atomic_xchg_f64_offset(ptr addrspace(1) %out, double
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_swap_b64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr double, ptr addrspace(1) %out, i64 4
@@ -4659,8 +4625,7 @@ define amdgpu_kernel void @atomic_xchg_pointer_offset(ptr addrspace(1) %out, ptr
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_swap_b64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr ptr, ptr addrspace(1) %out, i64 4
@@ -4731,8 +4696,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -4810,8 +4774,7 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(ptr addrspace(1) %out,
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_swap_b64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -4892,8 +4855,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(ptr addrspace(1) %o
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -4957,8 +4919,7 @@ define amdgpu_kernel void @atomic_xchg_i64(ptr addrspace(1) %out, i64 %in) {
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_swap_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile xchg ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
@@ -5028,8 +4989,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret(ptr addrspace(1) %out, ptr addrsp
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -5104,8 +5064,7 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64(ptr addrspace(1) %out, i64 %in
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_swap_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -5183,8 +5142,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -5243,8 +5201,7 @@ define amdgpu_kernel void @atomic_xor_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_xor_b64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -5315,8 +5272,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_xor_b64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -5394,8 +5350,7 @@ define amdgpu_kernel void @atomic_xor_i64_addr64_offset(ptr addrspace(1) %out, i
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_xor_b64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -5476,8 +5431,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(ptr addrspace(1) %ou
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_xor_b64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -5541,8 +5495,7 @@ define amdgpu_kernel void @atomic_xor_i64(ptr addrspace(1) %out, i64 %in) {
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_xor_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%tmp0 = atomicrmw volatile xor ptr addrspace(1) %out, i64 %in syncscope("agent") seq_cst
@@ -5612,8 +5565,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret(ptr addrspace(1) %out, ptr addrspa
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_xor_b64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -5688,8 +5640,7 @@ define amdgpu_kernel void @atomic_xor_i64_addr64(ptr addrspace(1) %out, i64 %in,
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_xor_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -5767,8 +5718,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64(ptr addrspace(1) %out, ptr
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_atomic_xor_b64 v[0:1], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -5843,8 +5793,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr addrspace(1) %out, i64
; GFX12-NEXT: v_mov_b32_e32 v2, s0
; GFX12-NEXT: global_atomic_cmpswap_b64 v4, v[0:3], s[4:5] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -5917,8 +5866,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr addrspace(1) %out, i64
; GFX12-NEXT: v_mov_b32_e32 v2, s0
; GFX12-NEXT: global_atomic_cmpswap_b64 v4, v[0:3], s[4:5] offset:72000
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 9000
@@ -5991,8 +5939,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr addrspace(1) %out,
; GFX12-NEXT: v_mov_b32_e32 v2, s6
; GFX12-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v4, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -6074,8 +6021,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr addrspace(1) %ou
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; GFX12-NEXT: global_atomic_cmpswap_b64 v4, v[0:3], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -6168,8 +6114,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr addrspace(1)
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v4, v[0:1], s[6:7]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -6246,8 +6191,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr addrspace(1) %out, i64 %in, i6
; GFX12-NEXT: v_mov_b32_e32 v2, s0
; GFX12-NEXT: global_atomic_cmpswap_b64 v4, v[0:3], s[4:5]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%val = cmpxchg volatile ptr addrspace(1) %out, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
@@ -6319,8 +6263,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret(ptr addrspace(1) %out, ptr add
; GFX12-NEXT: v_mov_b32_e32 v2, s6
; GFX12-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v4, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -6399,8 +6342,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr addrspace(1) %out, i64
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
; GFX12-NEXT: global_atomic_cmpswap_b64 v4, v[0:3], s[0:1]
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -6490,8 +6432,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr addrspace(1) %out,
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_cmpswap_b64 v[0:1], v4, v[0:3], s[0:1] th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v4, v[0:1], s[6:7]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -6557,8 +6498,7 @@ define amdgpu_kernel void @atomic_load_i64_offset(ptr addrspace(1) %in, ptr addr
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: global_load_b64 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -6625,8 +6565,7 @@ define amdgpu_kernel void @atomic_load_i64_neg_offset(ptr addrspace(1) %in, ptr
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: global_load_b64 v[0:1], v2, s[0:1] offset:-32 th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -6689,8 +6628,7 @@ define amdgpu_kernel void @atomic_load_i64(ptr addrspace(1) %in, ptr addrspace(1
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
; GFX12-NEXT: global_load_b64 v[0:1], v2, s[0:1] th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -6771,8 +6709,7 @@ define amdgpu_kernel void @atomic_load_i64_addr64_offset(ptr addrspace(1) %in, p
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_load_b64 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -6853,8 +6790,7 @@ define amdgpu_kernel void @atomic_load_i64_addr64(ptr addrspace(1) %in, ptr addr
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_load_b64 v[0:1], v2, s[0:1] th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -6936,8 +6872,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64_offset(ptr addrspace(1) %in, p
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
; GFX12-NEXT: global_load_b64 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -7307,8 +7242,7 @@ define amdgpu_kernel void @atomic_inc_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_inc_u64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -7379,8 +7313,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_inc_u64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -7458,8 +7391,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr addrspace(1) %out, i
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_inc_u64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index
@@ -7515,8 +7447,7 @@ define amdgpu_kernel void @atomic_dec_i64_offset(ptr addrspace(1) %out, i64 %in)
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: global_atomic_dec_u64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i64, ptr addrspace(1) %out, i64 4
@@ -7587,8 +7518,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr addrspace(1) %out, ptr
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: global_atomic_dec_u64 v[0:1], v2, v[0:1], s[0:1] offset:32 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_waitcnt vmcnt(0)
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -7666,8 +7596,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr addrspace(1) %out, i
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[4:5], s[0:1]
; GFX12-NEXT: global_atomic_dec_u64 v2, v[0:1], s[0:1] offset:32
; GFX12-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX12-NEXT: buffer_gl0_inv
-; GFX12-NEXT: buffer_gl1_inv
+; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
entry:
%ptr = getelementptr i64, ptr addrspace(1) %out, i64 %index