diff options
author | Pierre van Houtryve <pierre.vanhoutryve@amd.com> | 2024-02-13 09:07:51 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-13 09:07:51 +0100 |
commit | 87d771193490e6604f132752438bd8404c83498c (patch) | |
tree | f83d36eb2288d0b02296155c3451a02a84ad9064 | |
parent | a1efe56ace6099959ac97fcb09b9a7837b6d0255 (diff) | |
download | llvm-87d771193490e6604f132752438bd8404c83498c.zip llvm-87d771193490e6604f132752438bd8404c83498c.tar.gz llvm-87d771193490e6604f132752438bd8404c83498c.tar.bz2 |
[AMDGPU][SIMemoryLegalizer] Fix order of GL0/1_INV on GFX10/11 (#81450)
Fixes SWDEV-443292
17 files changed, 1574 insertions, 1571 deletions
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 2b4ca0a..8c12690 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -12291,8 +12291,8 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. before invalidating the caches. - 3. buffer_gl0_inv; - buffer_gl1_inv + 3. buffer_gl1_inv; + buffer_gl0_inv - Must happen before any following @@ -12321,8 +12321,8 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. before invalidating the caches. - 3. buffer_gl0_inv; - buffer_gl1_inv + 3. buffer_gl1_inv; + buffer_gl0_inv - Must happen before any following @@ -12428,8 +12428,8 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. invalidating the caches. - 3. buffer_gl0_inv; - buffer_gl1_inv + 3. buffer_gl1_inv; + buffer_gl0_inv - Must happen before any following @@ -12459,8 +12459,8 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. invalidating the caches. - 3. buffer_gl0_inv; - buffer_gl1_inv + 3. buffer_gl1_inv; + buffer_gl0_inv - Must happen before any following @@ -12655,8 +12655,8 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. the fence-paired-atomic. - 2. buffer_gl0_inv; - buffer_gl1_inv + 2. buffer_gl1_inv; + buffer_gl0_inv - Must happen before any following global/generic @@ -13369,8 +13369,8 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. invalidating the caches. - 4. buffer_gl0_inv; - buffer_gl1_inv + 4. buffer_gl1_inv; + buffer_gl0_inv - Must happen before any following @@ -13444,8 +13444,8 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. invalidating the caches. - 4. buffer_gl0_inv; - buffer_gl1_inv + 4. buffer_gl1_inv; + buffer_gl0_inv - Must happen before any following @@ -13672,8 +13672,8 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. requirements of release. - 2. buffer_gl0_inv; - buffer_gl1_inv + 2. buffer_gl1_inv; + buffer_gl0_inv - Must happen before any following diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 84b9330..f62e808 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -2030,8 +2030,11 @@ bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: - BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV)); + // The order of invalidates matter here. We must invalidate "outer in" + // so L1 -> L0 to avoid L0 pulling in stale data from L1 when it is + // invalidated. BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV)); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV)); Changed = true; break; case SIAtomicScope::WORKGROUP: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll index 25cee87..b04bc04 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll @@ -338,8 +338,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v0, v1, v0, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -350,8 +350,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v0, v1, v0, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -416,8 +416,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %ou ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v0, v1, v0, s[2:3] offset:16 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -428,8 +428,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %ou ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v0, v1, v0, s[2:3] offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -495,8 +495,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_system(ptr addrspace ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v0, v1, v0, s[2:3] offset:16 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -507,8 +507,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_system(ptr addrspace ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v0, v1, v0, s[2:3] offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -563,8 +563,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32(ptr addrspace(1) %ptr) #1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v1, v0, s[0:1] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_dec_noret_i32: @@ -574,8 +574,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32(ptr addrspace(1) %ptr) #1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v1, v0, s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4 ret void @@ -629,8 +629,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) % ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v1, v0, s[0:1] offset:16 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_dec_noret_i32_offset: @@ -640,8 +640,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) % ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v1, v0, s[0:1] offset:16 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4 @@ -696,8 +696,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_system(ptr addrspa ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v1, v0, s[0:1] offset:16 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_dec_noret_i32_offset_system: @@ -707,8 +707,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_system(ptr addrspa ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v1, v0, s[0:1] offset:16 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 seq_cst, align 4 @@ -780,8 +780,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v1, v0, v1, s[2:3] offset:20 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -792,8 +792,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v1, v0, v1, s[2:3] offset:20 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -861,8 +861,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspa ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v0, v1, s[0:1] offset:20 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_dec_noret_i32_offset_addr64: @@ -872,8 +872,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspa ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v0, v1, s[0:1] offset:20 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id @@ -937,8 +937,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #1 { ; GFX10-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: flat_store_dword v[0:1], v2 @@ -952,8 +952,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #1 { ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-NEXT: flat_atomic_dec_u32 v2, v[0:1], v2 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm @@ -1023,8 +1023,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: flat_store_dword v[0:1], v2 @@ -1038,8 +1038,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-NEXT: flat_atomic_dec_u32 v2, v[0:1], v2 offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm @@ -1110,8 +1110,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr % ; GFX10-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: flat_store_dword v[0:1], v2 @@ -1125,8 +1125,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr % ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-NEXT: flat_atomic_dec_u32 v2, v[0:1], v2 offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm @@ -1183,8 +1183,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 { ; GFX10-NEXT: flat_atomic_dec v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_dec_noret_i32: @@ -1196,8 +1196,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 { ; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %result = atomicrmw udec_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4 ret void @@ -1256,8 +1256,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 { ; GFX10-NEXT: flat_atomic_dec v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_dec_noret_i32_offset: @@ -1269,8 +1269,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 { ; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i32, ptr %ptr, i32 4 %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4 @@ -1330,8 +1330,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_system(ptr %ptr) #1 ; GFX10-NEXT: flat_atomic_dec v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_dec_noret_i32_offset_system: @@ -1343,8 +1343,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_system(ptr %ptr) #1 ; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i32, ptr %ptr, i32 4 %result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4 @@ -1430,8 +1430,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr % ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_atomic_dec v3, v[0:1], v3 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 @@ -1450,8 +1450,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr % ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: flat_atomic_dec_u32 v3, v[0:1], v3 offset:20 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 @@ -1532,8 +1532,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #1 ; GFX10-NEXT: flat_atomic_dec v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_dec_noret_i32_offset_addr64: @@ -1549,8 +1549,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #1 ; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2 offset:20 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, ptr %ptr, i32 %id @@ -1628,8 +1628,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #1 { ; GFX10-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -1644,8 +1644,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #1 { ; GFX11-NEXT: v_mov_b32_e32 v3, s3 ; GFX11-NEXT: flat_atomic_dec_u64 v[0:1], v[2:3], v[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX11-NEXT: s_endpgm @@ -1729,8 +1729,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #1 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -1745,8 +1745,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #1 ; GFX11-NEXT: v_mov_b32_e32 v3, s3 ; GFX11-NEXT: flat_atomic_dec_u64 v[0:1], v[2:3], v[0:1] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX11-NEXT: s_endpgm @@ -1807,8 +1807,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64(ptr %ptr) #1 { ; GFX10-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_dec_noret_i64: @@ -1821,8 +1821,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64(ptr %ptr) #1 { ; GFX11-NEXT: flat_atomic_dec_u64 v[2:3], v[0:1] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %result = atomicrmw udec_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8 ret void @@ -1885,8 +1885,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) #1 { ; GFX10-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_dec_noret_i64_offset: @@ -1899,8 +1899,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) #1 { ; GFX11-NEXT: flat_atomic_dec_u64 v[2:3], v[0:1] offset:32 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i64, ptr %ptr, i32 4 %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8 @@ -1964,8 +1964,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_system(ptr %ptr) #1 ; GFX10-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_dec_noret_i64_offset_system: @@ -1978,8 +1978,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_system(ptr %ptr) #1 ; GFX11-NEXT: flat_atomic_dec_u64 v[2:3], v[0:1] offset:32 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i64, ptr %ptr, i32 4 %result = atomicrmw udec_wrap ptr %gep, i64 42 seq_cst, align 8 @@ -2075,8 +2075,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr % ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -2097,8 +2097,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr % ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: flat_atomic_dec_u64 v[0:1], v[0:1], v[2:3] offset:40 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -2183,8 +2183,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #1 ; GFX10-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_dec_noret_i64_offset_addr64: @@ -2201,8 +2201,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #1 ; GFX11-NEXT: flat_atomic_dec_u64 v[0:1], v[2:3] offset:40 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, ptr %ptr, i32 %id @@ -2651,8 +2651,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -2664,8 +2664,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v2, v[0:1], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2734,8 +2734,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %ou ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -2747,8 +2747,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %ou ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2818,8 +2818,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_system(ptr addrspace ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -2831,8 +2831,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_system(ptr addrspace ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2891,8 +2891,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64(ptr addrspace(1) %ptr) #1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_dec_noret_i64: @@ -2903,8 +2903,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64(ptr addrspace(1) %ptr) #1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8 ret void @@ -2962,8 +2962,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) % ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] offset:32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_dec_noret_i64_offset: @@ -2974,8 +2974,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) % ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v2, v[0:1], s[0:1] offset:32 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4 %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8 @@ -3034,8 +3034,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_system(ptr addrspa ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] offset:32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_dec_noret_i64_offset_system: @@ -3046,8 +3046,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_system(ptr addrspa ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v2, v[0:1], s[0:1] offset:32 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4 %result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 seq_cst, align 8 @@ -3123,8 +3123,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v[0:1], v3, v[1:2], s[2:3] offset:40 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -3136,8 +3136,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v3, v[1:2], s[2:3] offset:40 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -3209,8 +3209,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspa ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v0, v[1:2], s[0:1] offset:40 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_dec_noret_i64_offset_addr64: @@ -3221,8 +3221,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspa ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v0, v[1:2], s[0:1] offset:40 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll index 5e01337..f6a997f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll @@ -338,8 +338,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v0, v1, v0, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -350,8 +350,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v0, v1, v0, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -416,8 +416,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %ou ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v0, v1, v0, s[2:3] offset:16 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -428,8 +428,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %ou ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v0, v1, v0, s[2:3] offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -495,8 +495,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_sistem(ptr addrspace ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v0, v1, v0, s[2:3] offset:16 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -507,8 +507,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_sistem(ptr addrspace ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v0, v1, v0, s[2:3] offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -563,8 +563,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32(ptr addrspace(1) %ptr) #1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v1, v0, s[0:1] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_inc_noret_i32: @@ -574,8 +574,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32(ptr addrspace(1) %ptr) #1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v1, v0, s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4 ret void @@ -629,8 +629,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(ptr addrspace(1) % ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v1, v0, s[0:1] offset:16 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_inc_noret_i32_offset: @@ -640,8 +640,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(ptr addrspace(1) % ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v1, v0, s[0:1] offset:16 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4 @@ -696,8 +696,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_system(ptr addrspa ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v1, v0, s[0:1] offset:16 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_inc_noret_i32_offset_system: @@ -707,8 +707,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_system(ptr addrspa ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v1, v0, s[0:1] offset:16 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i32 42 seq_cst, align 4 @@ -780,8 +780,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v1, v0, v1, s[2:3] offset:20 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -792,8 +792,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v1, v0, v1, s[2:3] offset:20 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -861,8 +861,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(ptr addrspa ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v0, v1, s[0:1] offset:20 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_inc_noret_i32_offset_addr64: @@ -872,8 +872,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(ptr addrspa ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v0, v1, s[0:1] offset:20 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id @@ -1322,8 +1322,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -1335,8 +1335,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v2, v[0:1], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1405,8 +1405,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %ou ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -1418,8 +1418,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %ou ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1489,8 +1489,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_system(ptr addrspace ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -1502,8 +1502,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_system(ptr addrspace ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1562,8 +1562,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64(ptr addrspace(1) %ptr) #1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_inc_noret_i64: @@ -1574,8 +1574,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64(ptr addrspace(1) %ptr) #1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %result = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8 ret void @@ -1633,8 +1633,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(ptr addrspace(1) % ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] offset:32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_inc_noret_i64_offset: @@ -1645,8 +1645,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(ptr addrspace(1) % ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v2, v[0:1], s[0:1] offset:32 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4 %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8 @@ -1705,8 +1705,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_system(ptr addrspa ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] offset:32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_inc_noret_i64_offset_system: @@ -1717,8 +1717,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_system(ptr addrspa ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v2, v[0:1], s[0:1] offset:32 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4 %result = atomicrmw uinc_wrap ptr addrspace(1) %gep, i64 42 seq_cst, align 8 @@ -1794,8 +1794,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v[0:1], v3, v[1:2], s[2:3] offset:40 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -1807,8 +1807,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v3, v[1:2], s[2:3] offset:40 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1880,8 +1880,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(ptr addrspa ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v0, v[1:2], s[0:1] offset:40 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_atomic_inc_noret_i64_offset_addr64: @@ -1892,8 +1892,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(ptr addrspa ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v0, v[1:2], s[0:1] offset:40 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id @@ -1957,8 +1957,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32(ptr %out, ptr %ptr) #1 { ; GFX10-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: flat_store_dword v[0:1], v2 @@ -1972,8 +1972,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32(ptr %out, ptr %ptr) #1 { ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-NEXT: flat_atomic_inc_u32 v2, v[0:1], v2 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm @@ -2043,8 +2043,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #1 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: flat_store_dword v[0:1], v2 @@ -2058,8 +2058,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #1 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-NEXT: flat_atomic_inc_u32 v2, v[0:1], v2 offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm @@ -2130,8 +2130,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_system(ptr %out, ptr % ; GFX10-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: flat_store_dword v[0:1], v2 @@ -2145,8 +2145,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_system(ptr %out, ptr % ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-NEXT: flat_atomic_inc_u32 v2, v[0:1], v2 offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm @@ -2203,8 +2203,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) #1 { ; GFX10-NEXT: flat_atomic_inc v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_inc_noret_i32: @@ -2216,8 +2216,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) #1 { ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %result = atomicrmw uinc_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4 ret void @@ -2276,8 +2276,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 { ; GFX10-NEXT: flat_atomic_inc v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_inc_noret_i32_offset: @@ -2289,8 +2289,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 { ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i32, ptr %ptr, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4 @@ -2350,8 +2350,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_system(ptr %ptr) #1 ; GFX10-NEXT: flat_atomic_inc v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_inc_noret_i32_offset_system: @@ -2363,8 +2363,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_system(ptr %ptr) #1 ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i32, ptr %ptr, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4 @@ -2450,8 +2450,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr % ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_atomic_inc v3, v[0:1], v3 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 @@ -2470,8 +2470,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr % ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: flat_atomic_inc_u32 v3, v[0:1], v3 offset:20 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 @@ -2552,8 +2552,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1 ; GFX10-NEXT: flat_atomic_inc v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_inc_noret_i32_offset_addr64: @@ -2569,8 +2569,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1 ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:20 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, ptr %ptr, i32 %id @@ -2744,8 +2744,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64(ptr %out, ptr %ptr) #1 { ; GFX10-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -2760,8 +2760,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64(ptr %out, ptr %ptr) #1 { ; GFX11-NEXT: v_mov_b32_e32 v3, s3 ; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3], v[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX11-NEXT: s_endpgm @@ -2845,8 +2845,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #1 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -2861,8 +2861,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #1 ; GFX11-NEXT: v_mov_b32_e32 v3, s3 ; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3], v[0:1] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX11-NEXT: s_endpgm @@ -2947,8 +2947,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_system(ptr %out, ptr % ; GFX10-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -2963,8 +2963,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_system(ptr %out, ptr % ; GFX11-NEXT: v_mov_b32_e32 v3, s3 ; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3], v[0:1] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX11-NEXT: s_endpgm @@ -3025,8 +3025,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64(ptr %ptr) #1 { ; GFX10-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_inc_noret_i64: @@ -3039,8 +3039,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64(ptr %ptr) #1 { ; GFX11-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %result = atomicrmw uinc_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8 ret void @@ -3103,8 +3103,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) #1 { ; GFX10-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_inc_noret_i64_offset: @@ -3117,8 +3117,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) #1 { ; GFX11-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] offset:32 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i64, ptr %ptr, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8 @@ -3182,8 +3182,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_system(ptr %ptr) #1 ; GFX10-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_inc_noret_i64_offset_system: @@ -3196,8 +3196,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_system(ptr %ptr) #1 ; GFX11-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] offset:32 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %gep = getelementptr i64, ptr %ptr, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8 @@ -3293,8 +3293,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr % ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -3315,8 +3315,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr % ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[0:1], v[2:3] offset:40 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -3401,8 +3401,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1 ; GFX10-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: flat_atomic_inc_noret_i64_offset_addr64: @@ -3419,8 +3419,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1 ; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3] offset:40 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, ptr %ptr, i32 %id diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll index 03cd833..2727fde 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll @@ -27,32 +27,32 @@ define amdgpu_kernel void @system_one_as_acquire() { ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: system_one_as_acquire ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: system_one_as_acquire ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: system_one_as_acquire ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("one-as") acquire @@ -115,32 +115,32 @@ define amdgpu_kernel void @system_one_as_acq_rel() { ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: system_one_as_acq_rel ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: system_one_as_acq_rel ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: system_one_as_acq_rel ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("one-as") acq_rel @@ -164,32 +164,32 @@ define amdgpu_kernel void @system_one_as_seq_cst() { ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: system_one_as_seq_cst ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: system_one_as_seq_cst ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: system_one_as_seq_cst ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("one-as") seq_cst @@ -329,32 +329,32 @@ define amdgpu_kernel void @agent_one_as_acquire() { ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: agent_one_as_acquire ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: agent_one_as_acquire ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: agent_one_as_acquire ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("agent-one-as") acquire @@ -417,32 +417,32 @@ define amdgpu_kernel void @agent_one_as_acq_rel() { ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: agent_one_as_acq_rel ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: agent_one_as_acq_rel ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: agent_one_as_acq_rel ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("agent-one-as") acq_rel @@ -466,32 +466,32 @@ define amdgpu_kernel void @agent_one_as_seq_cst() { ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: agent_one_as_seq_cst ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: agent_one_as_seq_cst ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: agent_one_as_seq_cst ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("agent-one-as") seq_cst @@ -769,32 +769,32 @@ define amdgpu_kernel void @system_acquire() { ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: system_acquire ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: system_acquire ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: system_acquire ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence acquire @@ -857,32 +857,32 @@ define amdgpu_kernel void @system_acq_rel() { ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: system_acq_rel ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: system_acq_rel ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: system_acq_rel ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence acq_rel @@ -906,32 +906,32 @@ define amdgpu_kernel void @system_seq_cst() { ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: system_seq_cst ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: system_seq_cst ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: system_seq_cst ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence seq_cst @@ -1071,32 +1071,32 @@ define amdgpu_kernel void @agent_acquire() { ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: agent_acquire ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: agent_acquire ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: agent_acquire ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("agent") acquire @@ -1159,32 +1159,32 @@ define amdgpu_kernel void @agent_acq_rel() { ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: agent_acq_rel ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: agent_acq_rel ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: agent_acq_rel ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("agent") acq_rel @@ -1208,32 +1208,32 @@ define amdgpu_kernel void @agent_seq_cst() { ; GFX10WGP: bb.0.entry: ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: agent_seq_cst ; GFX10CU: bb.0.entry: ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX10CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: agent_seq_cst ; GFX11WGP: bb.0.entry: ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: agent_seq_cst ; GFX11CU: bb.0.entry: ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 - ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec + ; GFX11CU-NEXT: BUFFER_GL0_INV implicit $exec ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("agent") seq_cst diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll index b5cccb6..8ee0ee3 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll @@ -99,8 +99,8 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1064-NEXT: s_mov_b32 s9, s3 ; GFX1064-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv +; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: .LBB0_2: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] @@ -132,8 +132,8 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1032-NEXT: s_mov_b32 s9, s3 ; GFX1032-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv +; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: .LBB0_2: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 @@ -167,8 +167,8 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1164-NEXT: s_mov_b32 s9, s3 ; GFX1164-NEXT: buffer_atomic_add_u32 v1, off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: buffer_gl1_inv +; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: .LBB0_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -203,8 +203,8 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1132-NEXT: s_mov_b32 s9, s3 ; GFX1132-NEXT: buffer_atomic_add_u32 v1, off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: buffer_gl1_inv +; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: .LBB0_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -420,8 +420,8 @@ define amdgpu_kernel void @add_i32_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1064-NEXT: s_mov_b32 s13, s7 ; GFX1064-NEXT: buffer_atomic_add v1, off, s[12:15], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv +; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: .LBB1_2: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1] @@ -455,8 +455,8 @@ define amdgpu_kernel void @add_i32_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1032-NEXT: s_mov_b32 s9, s7 ; GFX1032-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv +; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: .LBB1_2: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0 @@ -492,8 +492,8 @@ define amdgpu_kernel void @add_i32_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1164-NEXT: s_mov_b32 s13, s7 ; GFX1164-NEXT: buffer_atomic_add_u32 v1, off, s[12:15], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: buffer_gl1_inv +; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: .LBB1_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164-NEXT: v_readfirstlane_b32 s0, v1 @@ -530,8 +530,8 @@ define amdgpu_kernel void @add_i32_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1132-NEXT: s_mov_b32 s9, s7 ; GFX1132-NEXT: buffer_atomic_add_u32 v1, off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: buffer_gl1_inv +; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: .LBB1_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132-NEXT: v_readfirstlane_b32 s2, v1 @@ -767,8 +767,8 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1064-NEXT: s_mov_b32 s9, s3 ; GFX1064-NEXT: buffer_atomic_add v0, off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv +; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: .LBB2_4: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] @@ -812,8 +812,8 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1032-NEXT: s_mov_b32 s9, s3 ; GFX1032-NEXT: buffer_atomic_add v0, off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv +; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: .LBB2_4: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s5 @@ -861,8 +861,8 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164-NEXT: s_mov_b32 s9, s3 ; GFX1164-NEXT: buffer_atomic_add_u32 v0, off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: buffer_gl1_inv +; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: .LBB2_4: ; GFX1164-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -910,8 +910,8 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1132-NEXT: s_mov_b32 s9, s3 ; GFX1132-NEXT: buffer_atomic_add_u32 v0, off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: buffer_gl1_inv +; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: .LBB2_4: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -1128,8 +1128,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1064-NEXT: s_mov_b32 s9, s3 ; GFX1064-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv +; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: .LBB3_2: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] @@ -1163,8 +1163,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1032-NEXT: s_mov_b32 s9, s3 ; GFX1032-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv +; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: .LBB3_2: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 @@ -1200,8 +1200,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1164-NEXT: s_mov_b32 s9, s3 ; GFX1164-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: buffer_gl1_inv +; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: .LBB3_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -1237,8 +1237,8 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1132-NEXT: s_mov_b32 s9, s3 ; GFX1132-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: buffer_gl1_inv +; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: .LBB3_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -1488,8 +1488,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1064-NEXT: s_mov_b32 s9, s7 ; GFX1064-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv +; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: .LBB4_2: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1] @@ -1529,8 +1529,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1032-NEXT: s_mov_b32 s9, s7 ; GFX1032-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv +; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: .LBB4_2: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0 @@ -1572,8 +1572,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1164-NEXT: s_mov_b32 s9, s7 ; GFX1164-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: buffer_gl1_inv +; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: .LBB4_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 @@ -1618,8 +1618,8 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1132-NEXT: s_mov_b32 s9, s7 ; GFX1132-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: buffer_gl1_inv +; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: .LBB4_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 @@ -1777,8 +1777,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX10-NEXT: s_mov_b32 s4, s0 ; GFX10-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_mov_b32 s5, s1 ; GFX10-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX10-NEXT: s_endpgm @@ -1797,8 +1797,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX11-NEXT: s_mov_b32 s4, s0 ; GFX11-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], 0 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_mov_b32 s5, s1 ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0 ; GFX11-NEXT: s_nop 0 @@ -1954,8 +1954,8 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1064-NEXT: s_mov_b32 s9, s3 ; GFX1064-NEXT: buffer_atomic_sub v1, off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv +; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: .LBB6_2: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] @@ -1988,8 +1988,8 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1032-NEXT: s_mov_b32 s9, s3 ; GFX1032-NEXT: buffer_atomic_sub v1, off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv +; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: .LBB6_2: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 @@ -2024,8 +2024,8 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1164-NEXT: s_mov_b32 s9, s3 ; GFX1164-NEXT: buffer_atomic_sub_u32 v1, off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: buffer_gl1_inv +; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: .LBB6_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -2061,8 +2061,8 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1132-NEXT: s_mov_b32 s9, s3 ; GFX1132-NEXT: buffer_atomic_sub_u32 v1, off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: buffer_gl1_inv +; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: .LBB6_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -2281,8 +2281,8 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1064-NEXT: s_mov_b32 s13, s7 ; GFX1064-NEXT: buffer_atomic_sub v1, off, s[12:15], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv +; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: .LBB7_2: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1] @@ -2317,8 +2317,8 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1032-NEXT: s_mov_b32 s9, s7 ; GFX1032-NEXT: buffer_atomic_sub v1, off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv +; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: .LBB7_2: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0 @@ -2355,8 +2355,8 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1164-NEXT: s_mov_b32 s13, s7 ; GFX1164-NEXT: buffer_atomic_sub_u32 v1, off, s[12:15], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: buffer_gl1_inv +; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: .LBB7_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -2394,8 +2394,8 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1132-NEXT: s_mov_b32 s9, s7 ; GFX1132-NEXT: buffer_atomic_sub_u32 v1, off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: buffer_gl1_inv +; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: .LBB7_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -2634,8 +2634,8 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1064-NEXT: s_mov_b32 s9, s3 ; GFX1064-NEXT: buffer_atomic_sub v0, off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv +; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: .LBB8_4: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] @@ -2679,8 +2679,8 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1032-NEXT: s_mov_b32 s9, s3 ; GFX1032-NEXT: buffer_atomic_sub v0, off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv +; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: .LBB8_4: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s5 @@ -2728,8 +2728,8 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164-NEXT: s_mov_b32 s9, s3 ; GFX1164-NEXT: buffer_atomic_sub_u32 v0, off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: buffer_gl1_inv +; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: .LBB8_4: ; GFX1164-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -2777,8 +2777,8 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1132-NEXT: s_mov_b32 s9, s3 ; GFX1132-NEXT: buffer_atomic_sub_u32 v0, off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: buffer_gl1_inv +; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: .LBB8_4: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -3034,8 +3034,8 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1064-NEXT: s_mov_b32 s9, s3 ; GFX1064-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv +; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: .LBB9_2: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] @@ -3072,8 +3072,8 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1032-NEXT: s_mov_b32 s9, s3 ; GFX1032-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv +; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: .LBB9_2: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 @@ -3112,8 +3112,8 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1164-NEXT: s_mov_b32 s9, s3 ; GFX1164-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: buffer_gl1_inv +; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: .LBB9_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -3152,8 +3152,8 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1132-NEXT: s_mov_b32 s9, s3 ; GFX1132-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: buffer_gl1_inv +; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: .LBB9_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -3415,8 +3415,8 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1064-NEXT: s_mov_b32 s9, s7 ; GFX1064-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv +; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: .LBB10_2: ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1] @@ -3459,8 +3459,8 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1032-NEXT: s_mov_b32 s9, s7 ; GFX1032-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv +; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: .LBB10_2: ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0 @@ -3505,8 +3505,8 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1164-NEXT: s_mov_b32 s9, s7 ; GFX1164-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) -; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: buffer_gl1_inv +; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: .LBB10_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -3553,8 +3553,8 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1132-NEXT: s_mov_b32 s9, s7 ; GFX1132-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) -; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: buffer_gl1_inv +; GFX1132-NEXT: buffer_gl0_inv ; GFX1132-NEXT: .LBB10_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -3722,8 +3722,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX10-NEXT: s_mov_b32 s4, s0 ; GFX10-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_mov_b32 s5, s1 ; GFX10-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX10-NEXT: s_endpgm @@ -3742,8 +3742,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX11-NEXT: s_mov_b32 s4, s0 ; GFX11-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], 0 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_mov_b32 s5, s1 ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0 ; GFX11-NEXT: s_nop 0 diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll index e64f8c2..19a1d2d9 100644 --- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -75,8 +75,8 @@ define float @syncscope_system(ptr %addr, float %val) #0 { ; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] glc ; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1100-NEXT: buffer_gl0_inv ; GFX1100-NEXT: buffer_gl1_inv +; GFX1100-NEXT: buffer_gl0_inv ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v4 ; GFX1100-NEXT: s_or_b32 s0, vcc_lo, s0 ; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) diff --git a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll index 135285cc..490167e 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll @@ -156,8 +156,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32(ptr addrspace(1) %ptr) #0 ; GFX10-NEXT: v_add_f32_e32 v4, v5, v2 ; GFX10-NEXT: global_atomic_cmpswap v1, v3, v[4:5], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v5 ; GFX10-NEXT: s_or_b32 s3, vcc_lo, s3 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s3 @@ -196,8 +196,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32(ptr addrspace(1) %ptr) #0 ; GFX11-NEXT: v_add_f32_e32 v4, v5, v2 ; GFX11-NEXT: global_atomic_cmpswap_b32 v1, v3, v[4:5], s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v5 ; GFX11-NEXT: s_or_b32 s3, vcc_lo, s3 ; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s3 @@ -357,8 +357,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(ptr addrspace(1) %ptr ; GFX10-NEXT: v_add_f32_e32 v4, v5, v2 ; GFX10-NEXT: global_atomic_cmpswap v1, v3, v[4:5], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v5 ; GFX10-NEXT: s_or_b32 s3, vcc_lo, s3 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s3 @@ -390,8 +390,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(ptr addrspace(1) %ptr ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_add_f32 v1, v2, v1, s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: .LBB1_2: ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 @@ -506,8 +506,8 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32(ptr addrspace(1) %ptr) # ; GFX10-NEXT: v_add_f32_e32 v0, v1, v2 ; GFX10-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2 @@ -531,8 +531,8 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32(ptr addrspace(1) %ptr) # ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_add_f32 v1, v0, s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: .LBB2_2: ; GFX11-NEXT: s_endpgm %result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 syncscope("agent") seq_cst @@ -638,8 +638,8 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_ieee(ptr addrspace(1) %p ; GFX10-NEXT: v_add_f32_e32 v0, v1, v2 ; GFX10-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2 @@ -663,8 +663,8 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_ieee(ptr addrspace(1) %p ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_add_f32 v1, v0, s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: .LBB3_2: ; GFX11-NEXT: s_endpgm %result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 syncscope("agent") seq_cst @@ -806,8 +806,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_agent(ptr addrspace(1) %pt ; GFX10-NEXT: v_add_f32_e32 v4, v5, v2 ; GFX10-NEXT: global_atomic_cmpswap v1, v3, v[4:5], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v5 ; GFX10-NEXT: s_or_b32 s3, vcc_lo, s3 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s3 @@ -838,8 +838,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_agent(ptr addrspace(1) %pt ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_add_f32 v1, v2, v1, s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: .LBB4_2: ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 @@ -1006,8 +1006,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(ptr addrspace(1) %p ; GFX10-NEXT: v_add_f32_e32 v4, v5, v2 ; GFX10-NEXT: global_atomic_cmpswap v1, v3, v[4:5], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v5 ; GFX10-NEXT: s_or_b32 s3, vcc_lo, s3 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s3 @@ -1046,8 +1046,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(ptr addrspace(1) %p ; GFX11-NEXT: v_add_f32_e32 v4, v5, v2 ; GFX11-NEXT: global_atomic_cmpswap_b32 v1, v3, v[4:5], s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v5 ; GFX11-NEXT: s_or_b32 s3, vcc_lo, s3 ; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s3 @@ -1325,8 +1325,8 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_safe(ptr addrspace(1) %p ; GFX10-NEXT: v_add_f32_e32 v0, v1, v2 ; GFX10-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2 @@ -1357,8 +1357,8 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_safe(ptr addrspace(1) %p ; GFX11-NEXT: v_add_f32_e32 v0, v1, v2 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v3, v[0:1], s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v0 ; GFX11-NEXT: s_or_b32 s2, vcc_lo, s2 @@ -1631,8 +1631,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_agent(ptr addrspace(1) %p ; GFX10-NEXT: v_and_or_b32 v1, v2, s4, v1 ; GFX10-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 ; GFX10-NEXT: s_or_b32 s3, vcc_lo, s3 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s3 @@ -1669,8 +1669,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_agent(ptr addrspace(1) %p ; GFX11-NEXT: v_and_or_b32 v1, v2, s4, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 ; GFX11-NEXT: s_or_b32 s3, vcc_lo, s3 ; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s3 @@ -1819,8 +1819,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_system(ptr addrspace(1) % ; GFX10-NEXT: v_and_or_b32 v1, v2, s4, v1 ; GFX10-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 ; GFX10-NEXT: s_or_b32 s3, vcc_lo, s3 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s3 @@ -1857,8 +1857,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_system(ptr addrspace(1) % ; GFX11-NEXT: v_and_or_b32 v1, v2, s4, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 ; GFX11-NEXT: s_or_b32 s3, vcc_lo, s3 ; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s3 diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll index a847538..6b4a638 100644 --- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll @@ -48,8 +48,8 @@ define amdgpu_ps float @global_max_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX10-NEXT: v_max_i32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -75,8 +75,8 @@ define amdgpu_ps float @global_max_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX11-NEXT: v_max_i32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -131,8 +131,8 @@ define amdgpu_ps float @global_max_saddr_i32_rtn_neg128(ptr addrspace(1) inreg % ; GFX10-NEXT: v_max_i32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -158,8 +158,8 @@ define amdgpu_ps float @global_max_saddr_i32_rtn_neg128(ptr addrspace(1) inreg % ; GFX11-NEXT: v_max_i32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -211,8 +211,8 @@ define amdgpu_ps void @global_max_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, ; GFX10-NEXT: v_max_i32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -235,8 +235,8 @@ define amdgpu_ps void @global_max_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, ; GFX11-NEXT: v_max_i32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -286,8 +286,8 @@ define amdgpu_ps void @global_max_saddr_i32_nortn_neg128(ptr addrspace(1) inreg ; GFX10-NEXT: v_max_i32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -310,8 +310,8 @@ define amdgpu_ps void @global_max_saddr_i32_nortn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: v_max_i32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -372,8 +372,8 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn(ptr addrspace(1) inreg %s ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -403,8 +403,8 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn(ptr addrspace(1) inreg %s ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -467,8 +467,8 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn_neg128(ptr addrspace(1) i ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -498,8 +498,8 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn_neg128(ptr addrspace(1) i ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -558,8 +558,8 @@ define amdgpu_ps void @global_max_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: v_mov_b32_e32 v5, v3 @@ -585,8 +585,8 @@ define amdgpu_ps void @global_max_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 @@ -642,8 +642,8 @@ define amdgpu_ps void @global_max_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: v_mov_b32_e32 v5, v3 @@ -669,8 +669,8 @@ define amdgpu_ps void @global_max_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 @@ -730,8 +730,8 @@ define amdgpu_ps float @global_min_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX10-NEXT: v_min_i32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -757,8 +757,8 @@ define amdgpu_ps float @global_min_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX11-NEXT: v_min_i32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -813,8 +813,8 @@ define amdgpu_ps float @global_min_saddr_i32_rtn_neg128(ptr addrspace(1) inreg % ; GFX10-NEXT: v_min_i32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -840,8 +840,8 @@ define amdgpu_ps float @global_min_saddr_i32_rtn_neg128(ptr addrspace(1) inreg % ; GFX11-NEXT: v_min_i32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -893,8 +893,8 @@ define amdgpu_ps void @global_min_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, ; GFX10-NEXT: v_min_i32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -917,8 +917,8 @@ define amdgpu_ps void @global_min_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, ; GFX11-NEXT: v_min_i32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -968,8 +968,8 @@ define amdgpu_ps void @global_min_saddr_i32_nortn_neg128(ptr addrspace(1) inreg ; GFX10-NEXT: v_min_i32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -992,8 +992,8 @@ define amdgpu_ps void @global_min_saddr_i32_nortn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: v_min_i32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -1054,8 +1054,8 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn(ptr addrspace(1) inreg %s ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -1085,8 +1085,8 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn(ptr addrspace(1) inreg %s ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -1149,8 +1149,8 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn_neg128(ptr addrspace(1) i ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -1180,8 +1180,8 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn_neg128(ptr addrspace(1) i ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -1240,8 +1240,8 @@ define amdgpu_ps void @global_min_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: v_mov_b32_e32 v5, v3 @@ -1267,8 +1267,8 @@ define amdgpu_ps void @global_min_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 @@ -1324,8 +1324,8 @@ define amdgpu_ps void @global_min_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: v_mov_b32_e32 v5, v3 @@ -1351,8 +1351,8 @@ define amdgpu_ps void @global_min_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 @@ -1412,8 +1412,8 @@ define amdgpu_ps float @global_umax_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX10-NEXT: v_max_u32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -1439,8 +1439,8 @@ define amdgpu_ps float @global_umax_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX11-NEXT: v_max_u32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -1495,8 +1495,8 @@ define amdgpu_ps float @global_umax_saddr_i32_rtn_neg128(ptr addrspace(1) inreg ; GFX10-NEXT: v_max_u32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -1522,8 +1522,8 @@ define amdgpu_ps float @global_umax_saddr_i32_rtn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: v_max_u32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -1575,8 +1575,8 @@ define amdgpu_ps void @global_umax_saddr_i32_nortn(ptr addrspace(1) inreg %sbase ; GFX10-NEXT: v_max_u32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -1599,8 +1599,8 @@ define amdgpu_ps void @global_umax_saddr_i32_nortn(ptr addrspace(1) inreg %sbase ; GFX11-NEXT: v_max_u32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -1650,8 +1650,8 @@ define amdgpu_ps void @global_umax_saddr_i32_nortn_neg128(ptr addrspace(1) inreg ; GFX10-NEXT: v_max_u32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -1674,8 +1674,8 @@ define amdgpu_ps void @global_umax_saddr_i32_nortn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: v_max_u32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -1736,8 +1736,8 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn(ptr addrspace(1) inreg % ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -1767,8 +1767,8 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn(ptr addrspace(1) inreg % ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -1831,8 +1831,8 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn_neg128(ptr addrspace(1) ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -1862,8 +1862,8 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn_neg128(ptr addrspace(1) ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -1922,8 +1922,8 @@ define amdgpu_ps void @global_umax_saddr_i64_nortn(ptr addrspace(1) inreg %sbase ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: v_mov_b32_e32 v5, v3 @@ -1949,8 +1949,8 @@ define amdgpu_ps void @global_umax_saddr_i64_nortn(ptr addrspace(1) inreg %sbase ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 @@ -2006,8 +2006,8 @@ define amdgpu_ps void @global_umax_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: v_mov_b32_e32 v5, v3 @@ -2033,8 +2033,8 @@ define amdgpu_ps void @global_umax_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 @@ -2094,8 +2094,8 @@ define amdgpu_ps float @global_umin_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX10-NEXT: v_min_u32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -2121,8 +2121,8 @@ define amdgpu_ps float @global_umin_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX11-NEXT: v_min_u32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2177,8 +2177,8 @@ define amdgpu_ps float @global_umin_saddr_i32_rtn_neg128(ptr addrspace(1) inreg ; GFX10-NEXT: v_min_u32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -2204,8 +2204,8 @@ define amdgpu_ps float @global_umin_saddr_i32_rtn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: v_min_u32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2257,8 +2257,8 @@ define amdgpu_ps void @global_umin_saddr_i32_nortn(ptr addrspace(1) inreg %sbase ; GFX10-NEXT: v_min_u32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2281,8 +2281,8 @@ define amdgpu_ps void @global_umin_saddr_i32_nortn(ptr addrspace(1) inreg %sbase ; GFX11-NEXT: v_min_u32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2332,8 +2332,8 @@ define amdgpu_ps void @global_umin_saddr_i32_nortn_neg128(ptr addrspace(1) inreg ; GFX10-NEXT: v_min_u32_e32 v4, v5, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2356,8 +2356,8 @@ define amdgpu_ps void @global_umin_saddr_i32_nortn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: v_min_u32_e32 v4, v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2418,8 +2418,8 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn(ptr addrspace(1) inreg % ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -2449,8 +2449,8 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn(ptr addrspace(1) inreg % ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2513,8 +2513,8 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn_neg128(ptr addrspace(1) ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX10-NEXT: s_andn2_b64 exec, exec, s[0:1] @@ -2544,8 +2544,8 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn_neg128(ptr addrspace(1) ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2604,8 +2604,8 @@ define amdgpu_ps void @global_umin_saddr_i64_nortn(ptr addrspace(1) inreg %sbase ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: v_mov_b32_e32 v5, v3 @@ -2631,8 +2631,8 @@ define amdgpu_ps void @global_umin_saddr_i64_nortn(ptr addrspace(1) inreg %sbase ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 @@ -2688,8 +2688,8 @@ define amdgpu_ps void @global_umin_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: v_mov_b32_e32 v5, v3 @@ -2715,8 +2715,8 @@ define amdgpu_ps void @global_umin_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll index 94d21be..a3c8bb1 100644 --- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll @@ -17,16 +17,16 @@ define amdgpu_ps void @global_xchg_saddr_i32_nortn(ptr addrspace(1) inreg %sbase ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_xchg_saddr_i32_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -47,16 +47,16 @@ define amdgpu_ps void @global_xchg_saddr_i32_nortn_offset_2047(ptr addrspace(1) ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_swap v0, v1, s[2:3] offset:2047 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_xchg_saddr_i32_nortn_offset_2047: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] offset:2047 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -78,16 +78,16 @@ define amdgpu_ps void @global_xchg_saddr_i32_nortn_offset_neg2048(ptr addrspace( ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_swap v0, v1, s[2:3] offset:-2048 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_xchg_saddr_i32_nortn_offset_neg2048: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] offset:-2048 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -108,16 +108,16 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_xchg_saddr_i32_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -142,16 +142,16 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn_2048(ptr addrspace(1) inreg %s ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc, 0, v3, vcc ; GFX10-NEXT: global_atomic_swap v0, v[2:3], v1, off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_xchg_saddr_i32_rtn_2048: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] offset:2048 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -173,16 +173,16 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn_neg2048(ptr addrspace(1) inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[2:3] offset:-2048 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_xchg_saddr_i32_rtn_neg2048: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] offset:-2048 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -222,8 +222,8 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset, ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn: @@ -235,8 +235,8 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset, ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds %zext.offset = zext i32 %voffset to i64 @@ -270,8 +270,8 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] offset:42 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset: @@ -283,8 +283,8 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[0:1] offset:42 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds %zext.offset = zext i32 %voffset to i64 @@ -319,8 +319,8 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 ; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn: @@ -332,8 +332,8 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds %zext.offset = zext i32 %voffset to i64 @@ -366,8 +366,8 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i3 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 ; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1] offset:42 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset: @@ -379,8 +379,8 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i3 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] offset:42 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %sbase = load ptr addrspace(1), ptr addrspace(3) @ptr.in.lds %zext.offset = zext i32 %voffset to i64 @@ -410,16 +410,16 @@ define amdgpu_ps <2 x float> @global_xchg_saddr_i64_rtn(ptr addrspace(1) inreg % ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_xchg_saddr_i64_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_swap_b64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -440,16 +440,16 @@ define amdgpu_ps <2 x float> @global_xchg_saddr_i64_rtn_neg128(ptr addrspace(1) ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_xchg_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_swap_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -471,16 +471,16 @@ define amdgpu_ps void @global_xchg_saddr_i64_nortn(ptr addrspace(1) inreg %sbase ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_xchg_saddr_i64_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_swap_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -500,16 +500,16 @@ define amdgpu_ps void @global_xchg_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_xchg_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_swap_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -534,16 +534,16 @@ define amdgpu_ps float @global_add_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_add v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_add_saddr_i32_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_add_u32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -564,16 +564,16 @@ define amdgpu_ps float @global_add_saddr_i32_rtn_neg128(ptr addrspace(1) inreg % ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_add v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_add_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_add_u32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -595,16 +595,16 @@ define amdgpu_ps void @global_add_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_add v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_add_saddr_i32_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_add_u32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -624,16 +624,16 @@ define amdgpu_ps void @global_add_saddr_i32_nortn_neg128(ptr addrspace(1) inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_add v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_add_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_add_u32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -654,16 +654,16 @@ define amdgpu_ps <2 x float> @global_add_saddr_i64_rtn(ptr addrspace(1) inreg %s ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_add_saddr_i64_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_add_u64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -684,16 +684,16 @@ define amdgpu_ps <2 x float> @global_add_saddr_i64_rtn_neg128(ptr addrspace(1) i ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_add_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_add_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -715,16 +715,16 @@ define amdgpu_ps void @global_add_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_add_saddr_i64_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_add_u64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -744,16 +744,16 @@ define amdgpu_ps void @global_add_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_add_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_add_u64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -778,16 +778,16 @@ define amdgpu_ps float @global_sub_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_sub v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_sub_saddr_i32_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_sub_u32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -808,16 +808,16 @@ define amdgpu_ps float @global_sub_saddr_i32_rtn_neg128(ptr addrspace(1) inreg % ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_sub v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_sub_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_sub_u32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -839,16 +839,16 @@ define amdgpu_ps void @global_sub_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_sub v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_sub_saddr_i32_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_sub_u32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -868,16 +868,16 @@ define amdgpu_ps void @global_sub_saddr_i32_nortn_neg128(ptr addrspace(1) inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_sub v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_sub_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_sub_u32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -898,16 +898,16 @@ define amdgpu_ps <2 x float> @global_sub_saddr_i64_rtn(ptr addrspace(1) inreg %s ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_sub_saddr_i64_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_sub_u64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -928,16 +928,16 @@ define amdgpu_ps <2 x float> @global_sub_saddr_i64_rtn_neg128(ptr addrspace(1) i ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_sub_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_sub_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -959,16 +959,16 @@ define amdgpu_ps void @global_sub_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_sub_saddr_i64_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_sub_u64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -988,16 +988,16 @@ define amdgpu_ps void @global_sub_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_sub_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_sub_u64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1022,16 +1022,16 @@ define amdgpu_ps float @global_and_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_and v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_and_saddr_i32_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_and_b32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1052,16 +1052,16 @@ define amdgpu_ps float @global_and_saddr_i32_rtn_neg128(ptr addrspace(1) inreg % ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_and v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_and_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_and_b32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1083,16 +1083,16 @@ define amdgpu_ps void @global_and_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_and v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_and_saddr_i32_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_and_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1112,16 +1112,16 @@ define amdgpu_ps void @global_and_saddr_i32_nortn_neg128(ptr addrspace(1) inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_and v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_and_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_and_b32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1142,16 +1142,16 @@ define amdgpu_ps <2 x float> @global_and_saddr_i64_rtn(ptr addrspace(1) inreg %s ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_and_saddr_i64_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_and_b64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1172,16 +1172,16 @@ define amdgpu_ps <2 x float> @global_and_saddr_i64_rtn_neg128(ptr addrspace(1) i ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_and_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_and_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1203,16 +1203,16 @@ define amdgpu_ps void @global_and_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_and_saddr_i64_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_and_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1232,16 +1232,16 @@ define amdgpu_ps void @global_and_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_and_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_and_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1266,16 +1266,16 @@ define amdgpu_ps float @global_or_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_or v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_or_saddr_i32_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_or_b32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1296,16 +1296,16 @@ define amdgpu_ps float @global_or_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %s ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_or v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_or_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_or_b32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1327,16 +1327,16 @@ define amdgpu_ps void @global_or_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_or v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_or_saddr_i32_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_or_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1356,16 +1356,16 @@ define amdgpu_ps void @global_or_saddr_i32_nortn_neg128(ptr addrspace(1) inreg % ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_or v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_or_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_or_b32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1386,16 +1386,16 @@ define amdgpu_ps <2 x float> @global_or_saddr_i64_rtn(ptr addrspace(1) inreg %sb ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_or_saddr_i64_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_or_b64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1416,16 +1416,16 @@ define amdgpu_ps <2 x float> @global_or_saddr_i64_rtn_neg128(ptr addrspace(1) in ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_or_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_or_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1447,16 +1447,16 @@ define amdgpu_ps void @global_or_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_or_saddr_i64_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_or_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1476,16 +1476,16 @@ define amdgpu_ps void @global_or_saddr_i64_nortn_neg128(ptr addrspace(1) inreg % ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_or_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_or_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1510,16 +1510,16 @@ define amdgpu_ps float @global_xor_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_xor v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_xor_saddr_i32_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_xor_b32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1540,16 +1540,16 @@ define amdgpu_ps float @global_xor_saddr_i32_rtn_neg128(ptr addrspace(1) inreg % ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_xor v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_xor_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_xor_b32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1571,16 +1571,16 @@ define amdgpu_ps void @global_xor_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_xor v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_xor_saddr_i32_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_xor_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1600,16 +1600,16 @@ define amdgpu_ps void @global_xor_saddr_i32_nortn_neg128(ptr addrspace(1) inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_xor v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_xor_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_xor_b32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1630,16 +1630,16 @@ define amdgpu_ps <2 x float> @global_xor_saddr_i64_rtn(ptr addrspace(1) inreg %s ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_xor_saddr_i64_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_xor_b64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1660,16 +1660,16 @@ define amdgpu_ps <2 x float> @global_xor_saddr_i64_rtn_neg128(ptr addrspace(1) i ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_xor_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_xor_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1691,16 +1691,16 @@ define amdgpu_ps void @global_xor_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_xor_saddr_i64_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_xor_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -1720,16 +1720,16 @@ define amdgpu_ps void @global_xor_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_xor_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_atomic_xor_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -2620,8 +2620,8 @@ define amdgpu_ps float @global_cmpxchg_saddr_i32_rtn(ptr addrspace(1) inreg %sba ; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_cmpxchg_saddr_i32_rtn: @@ -2629,8 +2629,8 @@ define amdgpu_ps float @global_cmpxchg_saddr_i32_rtn(ptr addrspace(1) inreg %sba ; GFX11-NEXT: v_mov_b32_e32 v3, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -2654,8 +2654,8 @@ define amdgpu_ps float @global_cmpxchg_saddr_i32_rtn_neg128(ptr addrspace(1) inr ; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_cmpxchg_saddr_i32_rtn_neg128: @@ -2663,8 +2663,8 @@ define amdgpu_ps float @global_cmpxchg_saddr_i32_rtn_neg128(ptr addrspace(1) inr ; GFX11-NEXT: v_mov_b32_e32 v3, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -2689,8 +2689,8 @@ define amdgpu_ps void @global_cmpxchg_saddr_i32_nortn(ptr addrspace(1) inreg %sb ; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_cmpxchg_saddr_i32_nortn: @@ -2698,8 +2698,8 @@ define amdgpu_ps void @global_cmpxchg_saddr_i32_nortn(ptr addrspace(1) inreg %sb ; GFX11-NEXT: v_mov_b32_e32 v3, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -2721,8 +2721,8 @@ define amdgpu_ps void @global_cmpxchg_saddr_i32_nortn_neg128(ptr addrspace(1) in ; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_cmpxchg_saddr_i32_nortn_neg128: @@ -2730,8 +2730,8 @@ define amdgpu_ps void @global_cmpxchg_saddr_i32_nortn_neg128(ptr addrspace(1) in ; GFX11-NEXT: v_mov_b32_e32 v3, v1 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -2756,8 +2756,8 @@ define amdgpu_ps <2 x float> @global_cmpxchg_saddr_i64_rtn(ptr addrspace(1) inre ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_cmpxchg_saddr_i64_rtn: @@ -2766,8 +2766,8 @@ define amdgpu_ps <2 x float> @global_cmpxchg_saddr_i64_rtn(ptr addrspace(1) inre ; GFX11-NEXT: v_mov_b32_e32 v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[0:1], v0, v[3:6], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -2793,8 +2793,8 @@ define amdgpu_ps <2 x float> @global_cmpxchg_saddr_i64_rtn_neg128(ptr addrspace( ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: global_cmpxchg_saddr_i64_rtn_neg128: @@ -2803,8 +2803,8 @@ define amdgpu_ps <2 x float> @global_cmpxchg_saddr_i64_rtn_neg128(ptr addrspace( ; GFX11-NEXT: v_mov_b32_e32 v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[0:1], v0, v[3:6], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -2831,8 +2831,8 @@ define amdgpu_ps void @global_cmpxchg_saddr_i64_nortn(ptr addrspace(1) inreg %sb ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_cmpxchg_saddr_i64_nortn: @@ -2841,8 +2841,8 @@ define amdgpu_ps void @global_cmpxchg_saddr_i64_nortn(ptr addrspace(1) inreg %sb ; GFX11-NEXT: v_mov_b32_e32 v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b64 v0, v[3:6], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset @@ -2866,8 +2866,8 @@ define amdgpu_ps void @global_cmpxchg_saddr_i64_nortn_neg128(ptr addrspace(1) in ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: global_cmpxchg_saddr_i64_nortn_neg128: @@ -2876,8 +2876,8 @@ define amdgpu_ps void @global_cmpxchg_saddr_i64_nortn_neg128(ptr addrspace(1) in ; GFX11-NEXT: v_mov_b32_e32 v5, v1 ; GFX11-NEXT: global_atomic_cmpswap_b64 v0, v[3:6], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll index 6d99485..d9cbbc1 100644 --- a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll @@ -3575,16 +3575,16 @@ define amdgpu_ps float @atomic_global_load_saddr_i32(ptr addrspace(1) inreg %sba ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: atomic_global_load_saddr_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_load_b32 v0, v0, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog ; ; GFX12-LABEL: atomic_global_load_saddr_i32: @@ -3612,16 +3612,16 @@ define amdgpu_ps float @atomic_global_load_saddr_i32_immneg128(ptr addrspace(1) ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] offset:-128 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: atomic_global_load_saddr_i32_immneg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_load_b32 v0, v0, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog ; ; GFX12-LABEL: atomic_global_load_saddr_i32_immneg128: @@ -3650,16 +3650,16 @@ define amdgpu_ps <2 x float> @atomic_global_load_saddr_i64(ptr addrspace(1) inre ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: atomic_global_load_saddr_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_load_b64 v[0:1], v0, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog ; ; GFX12-LABEL: atomic_global_load_saddr_i64: @@ -3687,16 +3687,16 @@ define amdgpu_ps <2 x float> @atomic_global_load_saddr_i64_immneg128(ptr addrspa ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] offset:-128 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: buffer_gl1_inv +; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: atomic_global_load_saddr_i64_immneg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: buffer_gl1_inv +; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: ; return to shader part epilog ; ; GFX12-LABEL: atomic_global_load_saddr_i64_immneg128: diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll index 833aba9..9e336a7 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll @@ -23,8 +23,8 @@ define fastcc i32 @foo() { ; CHECK-NEXT: $sgpr17 = S_ADDC_U32 internal $sgpr17, target-flags(amdgpu-gotprel32-hi) @bar + 12, implicit-def $scc, implicit internal $scc ; CHECK-NEXT: } ; CHECK-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 - ; CHECK-NEXT: BUFFER_GL0_INV implicit $exec ; CHECK-NEXT: BUFFER_GL1_INV implicit $exec + ; CHECK-NEXT: BUFFER_GL0_INV implicit $exec ; CHECK-NEXT: renamable $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM killed renamable $sgpr16_sgpr17, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, killed $vgpr40 ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, killed $vgpr40 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll index 89ccf97..77962fa 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll @@ -1224,14 +1224,14 @@ define amdgpu_kernel void @agent_acquire_fence() { ; ; GFX10-WGP-LABEL: agent_acquire_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_acquire_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_acquire_fence: @@ -1260,14 +1260,14 @@ define amdgpu_kernel void @agent_acquire_fence() { ; ; GFX11-WGP-LABEL: agent_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("agent") acquire @@ -1338,14 +1338,14 @@ define amdgpu_kernel void @agent_acq_rel_fence() { ; ; GFX10-WGP-LABEL: agent_acq_rel_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_acq_rel_fence: @@ -1376,14 +1376,14 @@ define amdgpu_kernel void @agent_acq_rel_fence() { ; ; GFX11-WGP-LABEL: agent_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("agent") acq_rel @@ -1403,14 +1403,14 @@ define amdgpu_kernel void @agent_seq_cst_fence() { ; ; GFX10-WGP-LABEL: agent_seq_cst_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_seq_cst_fence: @@ -1441,14 +1441,14 @@ define amdgpu_kernel void @agent_seq_cst_fence() { ; ; GFX11-WGP-LABEL: agent_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("agent") seq_cst @@ -1468,14 +1468,14 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() { ; ; GFX10-WGP-LABEL: agent_one_as_acquire_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_one_as_acquire_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_one_as_acquire_fence: @@ -1504,14 +1504,14 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() { ; ; GFX11-WGP-LABEL: agent_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_one_as_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("agent-one-as") acquire @@ -1582,14 +1582,14 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() { ; ; GFX10-WGP-LABEL: agent_one_as_acq_rel_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_one_as_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_one_as_acq_rel_fence: @@ -1620,14 +1620,14 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() { ; ; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_one_as_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("agent-one-as") acq_rel @@ -1647,14 +1647,14 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() { ; ; GFX10-WGP-LABEL: agent_one_as_seq_cst_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_one_as_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_one_as_seq_cst_fence: @@ -1685,14 +1685,14 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() { ; ; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_one_as_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("agent-one-as") seq_cst @@ -1712,14 +1712,14 @@ define amdgpu_kernel void @system_acquire_fence() { ; ; GFX10-WGP-LABEL: system_acquire_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_acquire_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_acquire_fence: @@ -1750,14 +1750,14 @@ define amdgpu_kernel void @system_acquire_fence() { ; ; GFX11-WGP-LABEL: system_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm entry: fence acquire @@ -1830,14 +1830,14 @@ define amdgpu_kernel void @system_acq_rel_fence() { ; ; GFX10-WGP-LABEL: system_acq_rel_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_acq_rel_fence: @@ -1872,14 +1872,14 @@ define amdgpu_kernel void @system_acq_rel_fence() { ; ; GFX11-WGP-LABEL: system_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm entry: fence acq_rel @@ -1899,14 +1899,14 @@ define amdgpu_kernel void @system_seq_cst_fence() { ; ; GFX10-WGP-LABEL: system_seq_cst_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_seq_cst_fence: @@ -1941,14 +1941,14 @@ define amdgpu_kernel void @system_seq_cst_fence() { ; ; GFX11-WGP-LABEL: system_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm entry: fence seq_cst @@ -1968,14 +1968,14 @@ define amdgpu_kernel void @system_one_as_acquire_fence() { ; ; GFX10-WGP-LABEL: system_one_as_acquire_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_one_as_acquire_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_one_as_acquire_fence: @@ -2006,14 +2006,14 @@ define amdgpu_kernel void @system_one_as_acquire_fence() { ; ; GFX11-WGP-LABEL: system_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_one_as_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("one-as") acquire @@ -2086,14 +2086,14 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() { ; ; GFX10-WGP-LABEL: system_one_as_acq_rel_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_one_as_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_one_as_acq_rel_fence: @@ -2128,14 +2128,14 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() { ; ; GFX11-WGP-LABEL: system_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_one_as_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("one-as") acq_rel @@ -2155,14 +2155,14 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() { ; ; GFX10-WGP-LABEL: system_one_as_seq_cst_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_one_as_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_one_as_seq_cst_fence: @@ -2197,14 +2197,14 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() { ; ; GFX11-WGP-LABEL: system_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_one_as_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("one-as") seq_cst diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll index d6c759f..d9f5e64 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll @@ -299,8 +299,8 @@ define amdgpu_kernel void @flat_agent_acquire_load( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -314,8 +314,8 @@ define amdgpu_kernel void @flat_agent_acquire_load( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -397,8 +397,8 @@ define amdgpu_kernel void @flat_agent_acquire_load( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -410,8 +410,8 @@ define amdgpu_kernel void @flat_agent_acquire_load( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -445,8 +445,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_load( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -460,8 +460,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_load( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -543,8 +543,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_load( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -556,8 +556,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_load( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -1168,8 +1168,8 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw( ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_acquire_atomicrmw: @@ -1184,8 +1184,8 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw( ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_atomicrmw: @@ -1259,8 +1259,8 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw( ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acquire_atomicrmw: @@ -1274,8 +1274,8 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1425,8 +1425,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw( ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_acq_rel_atomicrmw: @@ -1441,8 +1441,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw( ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_atomicrmw: @@ -1518,8 +1518,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw( ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acq_rel_atomicrmw: @@ -1533,8 +1533,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1568,8 +1568,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw( ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_seq_cst_atomicrmw: @@ -1584,8 +1584,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw( ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_atomicrmw: @@ -1661,8 +1661,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw( ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_seq_cst_atomicrmw: @@ -1676,8 +1676,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1711,8 +1711,8 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -1727,8 +1727,8 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -1807,8 +1807,8 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1822,8 +1822,8 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { @@ -1859,8 +1859,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -1875,8 +1875,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -1957,8 +1957,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1972,8 +1972,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { @@ -2009,8 +2009,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -2025,8 +2025,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -2107,8 +2107,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -2122,8 +2122,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { @@ -2275,8 +2275,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_acquire_monotonic_cmpxchg: @@ -2292,8 +2292,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_monotonic_cmpxchg: @@ -2363,8 +2363,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acquire_monotonic_cmpxchg: @@ -2376,8 +2376,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2530,8 +2530,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_acq_rel_monotonic_cmpxchg: @@ -2547,8 +2547,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_monotonic_cmpxchg: @@ -2620,8 +2620,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acq_rel_monotonic_cmpxchg: @@ -2633,8 +2633,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2672,8 +2672,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_seq_cst_monotonic_cmpxchg: @@ -2689,8 +2689,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_monotonic_cmpxchg: @@ -2762,8 +2762,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_seq_cst_monotonic_cmpxchg: @@ -2775,8 +2775,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2814,8 +2814,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_monotonic_acquire_cmpxchg: @@ -2831,8 +2831,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_acquire_cmpxchg: @@ -2902,8 +2902,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_monotonic_acquire_cmpxchg: @@ -2915,8 +2915,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2954,8 +2954,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_acquire_acquire_cmpxchg: @@ -2971,8 +2971,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_acquire_cmpxchg: @@ -3042,8 +3042,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acquire_acquire_cmpxchg: @@ -3055,8 +3055,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3094,8 +3094,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_release_acquire_cmpxchg: @@ -3111,8 +3111,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_acquire_cmpxchg: @@ -3184,8 +3184,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_release_acquire_cmpxchg: @@ -3197,8 +3197,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3236,8 +3236,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_acq_rel_acquire_cmpxchg: @@ -3253,8 +3253,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_acquire_cmpxchg: @@ -3326,8 +3326,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acq_rel_acquire_cmpxchg: @@ -3339,8 +3339,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3378,8 +3378,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_seq_cst_acquire_cmpxchg: @@ -3395,8 +3395,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_acquire_cmpxchg: @@ -3468,8 +3468,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_seq_cst_acquire_cmpxchg: @@ -3481,8 +3481,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3520,8 +3520,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_monotonic_seq_cst_cmpxchg: @@ -3537,8 +3537,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_seq_cst_cmpxchg: @@ -3610,8 +3610,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_monotonic_seq_cst_cmpxchg: @@ -3623,8 +3623,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3662,8 +3662,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_acquire_seq_cst_cmpxchg: @@ -3679,8 +3679,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_seq_cst_cmpxchg: @@ -3752,8 +3752,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acquire_seq_cst_cmpxchg: @@ -3765,8 +3765,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3804,8 +3804,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_release_seq_cst_cmpxchg: @@ -3821,8 +3821,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_seq_cst_cmpxchg: @@ -3894,8 +3894,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_release_seq_cst_cmpxchg: @@ -3907,8 +3907,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3946,8 +3946,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg: @@ -3963,8 +3963,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg: @@ -4036,8 +4036,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg: @@ -4049,8 +4049,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4088,8 +4088,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg: @@ -4105,8 +4105,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg: @@ -4178,8 +4178,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg: @@ -4191,8 +4191,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4375,8 +4375,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -4394,8 +4394,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -4474,8 +4474,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -4487,8 +4487,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -4676,8 +4676,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -4695,8 +4695,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -4777,8 +4777,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -4790,8 +4790,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -4834,8 +4834,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -4853,8 +4853,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -4935,8 +4935,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -4948,8 +4948,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -4992,8 +4992,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5011,8 +5011,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5091,8 +5091,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -5104,8 +5104,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5148,8 +5148,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5167,8 +5167,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5247,8 +5247,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -5260,8 +5260,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5304,8 +5304,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5323,8 +5323,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5405,8 +5405,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -5418,8 +5418,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5462,8 +5462,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5481,8 +5481,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5563,8 +5563,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -5576,8 +5576,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5620,8 +5620,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5639,8 +5639,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5721,8 +5721,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -5734,8 +5734,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5778,8 +5778,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5797,8 +5797,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5879,8 +5879,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -5892,8 +5892,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5936,8 +5936,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5955,8 +5955,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -6037,8 +6037,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -6050,8 +6050,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6094,8 +6094,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -6113,8 +6113,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -6195,8 +6195,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -6208,8 +6208,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6252,8 +6252,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -6271,8 +6271,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -6353,8 +6353,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -6366,8 +6366,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6410,8 +6410,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -6429,8 +6429,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -6511,8 +6511,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -6524,8 +6524,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6827,8 +6827,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -6843,8 +6843,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -6930,8 +6930,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 @@ -6944,8 +6944,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -6981,8 +6981,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -6997,8 +6997,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -7084,8 +7084,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 @@ -7098,8 +7098,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -7710,8 +7710,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_acquire_atomicrmw: @@ -7725,8 +7725,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_atomicrmw: @@ -7799,8 +7799,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acquire_atomicrmw: @@ -7813,8 +7813,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7963,8 +7963,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_acq_rel_atomicrmw: @@ -7978,8 +7978,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_atomicrmw: @@ -8054,8 +8054,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acq_rel_atomicrmw: @@ -8068,8 +8068,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -8102,8 +8102,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_seq_cst_atomicrmw: @@ -8117,8 +8117,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_atomicrmw: @@ -8193,8 +8193,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_seq_cst_atomicrmw: @@ -8207,8 +8207,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -8243,8 +8243,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm @@ -8260,8 +8260,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm @@ -8343,8 +8343,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -8359,8 +8359,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -8398,8 +8398,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm @@ -8415,8 +8415,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm @@ -8500,8 +8500,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -8516,8 +8516,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -8555,8 +8555,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm @@ -8572,8 +8572,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm @@ -8657,8 +8657,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -8673,8 +8673,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -8826,8 +8826,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg: @@ -8842,8 +8842,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg: @@ -8912,8 +8912,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg: @@ -8924,8 +8924,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9077,8 +9077,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg: @@ -9093,8 +9093,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg: @@ -9165,8 +9165,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg: @@ -9177,8 +9177,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9215,8 +9215,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg: @@ -9231,8 +9231,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg: @@ -9303,8 +9303,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg: @@ -9315,8 +9315,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9353,8 +9353,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg: @@ -9369,8 +9369,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg: @@ -9439,8 +9439,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg: @@ -9451,8 +9451,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9489,8 +9489,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg: @@ -9505,8 +9505,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg: @@ -9575,8 +9575,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg: @@ -9587,8 +9587,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9625,8 +9625,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_release_acquire_cmpxchg: @@ -9641,8 +9641,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_acquire_cmpxchg: @@ -9713,8 +9713,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_release_acquire_cmpxchg: @@ -9725,8 +9725,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9763,8 +9763,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg: @@ -9779,8 +9779,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg: @@ -9851,8 +9851,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg: @@ -9863,8 +9863,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9901,8 +9901,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg: @@ -9917,8 +9917,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg: @@ -9989,8 +9989,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg: @@ -10001,8 +10001,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10039,8 +10039,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg: @@ -10055,8 +10055,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg: @@ -10127,8 +10127,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg: @@ -10139,8 +10139,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10177,8 +10177,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg: @@ -10193,8 +10193,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg: @@ -10265,8 +10265,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg: @@ -10277,8 +10277,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10315,8 +10315,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg: @@ -10331,8 +10331,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg: @@ -10403,8 +10403,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg: @@ -10415,8 +10415,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10453,8 +10453,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg: @@ -10469,8 +10469,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg: @@ -10541,8 +10541,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg: @@ -10553,8 +10553,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10591,8 +10591,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg: @@ -10607,8 +10607,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg: @@ -10679,8 +10679,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg: @@ -10691,8 +10691,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10876,8 +10876,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -10896,8 +10896,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -10980,8 +10980,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -10994,8 +10994,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -11185,8 +11185,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -11205,8 +11205,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -11291,8 +11291,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -11305,8 +11305,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -11351,8 +11351,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -11371,8 +11371,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -11457,8 +11457,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -11471,8 +11471,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -11517,8 +11517,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -11537,8 +11537,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -11621,8 +11621,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -11635,8 +11635,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -11681,8 +11681,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -11701,8 +11701,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -11785,8 +11785,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -11799,8 +11799,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -11845,8 +11845,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -11865,8 +11865,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -11951,8 +11951,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -11965,8 +11965,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -12011,8 +12011,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -12031,8 +12031,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -12117,8 +12117,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -12131,8 +12131,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -12177,8 +12177,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -12197,8 +12197,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -12283,8 +12283,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -12297,8 +12297,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -12343,8 +12343,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -12363,8 +12363,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -12449,8 +12449,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -12463,8 +12463,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -12509,8 +12509,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -12529,8 +12529,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -12615,8 +12615,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -12629,8 +12629,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -12675,8 +12675,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -12695,8 +12695,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -12781,8 +12781,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -12795,8 +12795,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -12841,8 +12841,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -12861,8 +12861,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -12947,8 +12947,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -12961,8 +12961,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -13007,8 +13007,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -13027,8 +13027,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -13113,8 +13113,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -13127,8 +13127,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll index 461fe8b1..372f9ad 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll @@ -299,8 +299,8 @@ define amdgpu_kernel void @flat_system_acquire_load( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -314,8 +314,8 @@ define amdgpu_kernel void @flat_system_acquire_load( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -399,8 +399,8 @@ define amdgpu_kernel void @flat_system_acquire_load( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -412,8 +412,8 @@ define amdgpu_kernel void @flat_system_acquire_load( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -447,8 +447,8 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -462,8 +462,8 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -547,8 +547,8 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -560,8 +560,8 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -1176,8 +1176,8 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw( ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_acquire_atomicrmw: @@ -1192,8 +1192,8 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw( ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_atomicrmw: @@ -1269,8 +1269,8 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw( ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acquire_atomicrmw: @@ -1284,8 +1284,8 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1437,8 +1437,8 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw( ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_acq_rel_atomicrmw: @@ -1453,8 +1453,8 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw( ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_atomicrmw: @@ -1534,8 +1534,8 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw( ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acq_rel_atomicrmw: @@ -1549,8 +1549,8 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1584,8 +1584,8 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw( ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_seq_cst_atomicrmw: @@ -1600,8 +1600,8 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw( ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_atomicrmw: @@ -1681,8 +1681,8 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw( ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_seq_cst_atomicrmw: @@ -1696,8 +1696,8 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw( ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1731,8 +1731,8 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -1747,8 +1747,8 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -1829,8 +1829,8 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1844,8 +1844,8 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { @@ -1881,8 +1881,8 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -1897,8 +1897,8 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -1983,8 +1983,8 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1998,8 +1998,8 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { @@ -2035,8 +2035,8 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -2051,8 +2051,8 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -2137,8 +2137,8 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -2152,8 +2152,8 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { @@ -2305,8 +2305,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_acquire_monotonic_cmpxchg: @@ -2322,8 +2322,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_monotonic_cmpxchg: @@ -2395,8 +2395,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acquire_monotonic_cmpxchg: @@ -2408,8 +2408,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2564,8 +2564,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_acq_rel_monotonic_cmpxchg: @@ -2581,8 +2581,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_monotonic_cmpxchg: @@ -2658,8 +2658,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acq_rel_monotonic_cmpxchg: @@ -2671,8 +2671,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2710,8 +2710,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_seq_cst_monotonic_cmpxchg: @@ -2727,8 +2727,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_monotonic_cmpxchg: @@ -2804,8 +2804,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_seq_cst_monotonic_cmpxchg: @@ -2817,8 +2817,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2856,8 +2856,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_monotonic_acquire_cmpxchg: @@ -2873,8 +2873,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_acquire_cmpxchg: @@ -2946,8 +2946,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_monotonic_acquire_cmpxchg: @@ -2959,8 +2959,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2998,8 +2998,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_acquire_acquire_cmpxchg: @@ -3015,8 +3015,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_acquire_cmpxchg: @@ -3088,8 +3088,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acquire_acquire_cmpxchg: @@ -3101,8 +3101,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3140,8 +3140,8 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_release_acquire_cmpxchg: @@ -3157,8 +3157,8 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_release_acquire_cmpxchg: @@ -3234,8 +3234,8 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_release_acquire_cmpxchg: @@ -3247,8 +3247,8 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3286,8 +3286,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_acq_rel_acquire_cmpxchg: @@ -3303,8 +3303,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_acquire_cmpxchg: @@ -3380,8 +3380,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acq_rel_acquire_cmpxchg: @@ -3393,8 +3393,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3432,8 +3432,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_seq_cst_acquire_cmpxchg: @@ -3449,8 +3449,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_acquire_cmpxchg: @@ -3526,8 +3526,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_seq_cst_acquire_cmpxchg: @@ -3539,8 +3539,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3578,8 +3578,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_monotonic_seq_cst_cmpxchg: @@ -3595,8 +3595,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_seq_cst_cmpxchg: @@ -3672,8 +3672,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_monotonic_seq_cst_cmpxchg: @@ -3685,8 +3685,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3724,8 +3724,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_acquire_seq_cst_cmpxchg: @@ -3741,8 +3741,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_seq_cst_cmpxchg: @@ -3818,8 +3818,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acquire_seq_cst_cmpxchg: @@ -3831,8 +3831,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3870,8 +3870,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_release_seq_cst_cmpxchg: @@ -3887,8 +3887,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_release_seq_cst_cmpxchg: @@ -3964,8 +3964,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_release_seq_cst_cmpxchg: @@ -3977,8 +3977,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4016,8 +4016,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: @@ -4033,8 +4033,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: @@ -4110,8 +4110,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: @@ -4123,8 +4123,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4162,8 +4162,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: @@ -4179,8 +4179,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: @@ -4256,8 +4256,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: @@ -4269,8 +4269,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4453,8 +4453,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -4472,8 +4472,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -4554,8 +4554,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -4567,8 +4567,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -4758,8 +4758,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -4777,8 +4777,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -4863,8 +4863,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -4876,8 +4876,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -4920,8 +4920,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -4939,8 +4939,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5025,8 +5025,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -5038,8 +5038,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5082,8 +5082,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5101,8 +5101,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5183,8 +5183,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -5196,8 +5196,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5240,8 +5240,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5259,8 +5259,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5341,8 +5341,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -5354,8 +5354,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5398,8 +5398,8 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5417,8 +5417,8 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5503,8 +5503,8 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -5516,8 +5516,8 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5560,8 +5560,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5579,8 +5579,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5665,8 +5665,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -5678,8 +5678,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5722,8 +5722,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5741,8 +5741,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5827,8 +5827,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -5840,8 +5840,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -5884,8 +5884,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -5903,8 +5903,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -5989,8 +5989,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -6002,8 +6002,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6046,8 +6046,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -6065,8 +6065,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -6151,8 +6151,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -6164,8 +6164,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6208,8 +6208,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -6227,8 +6227,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -6313,8 +6313,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -6326,8 +6326,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6370,8 +6370,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -6389,8 +6389,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -6475,8 +6475,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -6488,8 +6488,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6532,8 +6532,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 @@ -6551,8 +6551,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -6637,8 +6637,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -6650,8 +6650,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -6953,8 +6953,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -6969,8 +6969,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -7058,8 +7058,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 @@ -7072,8 +7072,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -7109,8 +7109,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -7125,8 +7125,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -7214,8 +7214,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 @@ -7228,8 +7228,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -7844,8 +7844,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_acquire_atomicrmw: @@ -7859,8 +7859,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_atomicrmw: @@ -7935,8 +7935,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acquire_atomicrmw: @@ -7949,8 +7949,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -8101,8 +8101,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_acq_rel_atomicrmw: @@ -8116,8 +8116,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_atomicrmw: @@ -8196,8 +8196,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acq_rel_atomicrmw: @@ -8210,8 +8210,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -8244,8 +8244,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_seq_cst_atomicrmw: @@ -8259,8 +8259,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_atomicrmw: @@ -8339,8 +8339,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_seq_cst_atomicrmw: @@ -8353,8 +8353,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -8389,8 +8389,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm @@ -8406,8 +8406,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm @@ -8491,8 +8491,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -8507,8 +8507,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -8546,8 +8546,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm @@ -8563,8 +8563,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm @@ -8652,8 +8652,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -8668,8 +8668,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -8707,8 +8707,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm @@ -8724,8 +8724,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm @@ -8813,8 +8813,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -8829,8 +8829,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -8982,8 +8982,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: @@ -8998,8 +8998,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: @@ -9070,8 +9070,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: @@ -9082,8 +9082,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9237,8 +9237,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: @@ -9253,8 +9253,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: @@ -9329,8 +9329,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: @@ -9341,8 +9341,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9379,8 +9379,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: @@ -9395,8 +9395,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: @@ -9471,8 +9471,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: @@ -9483,8 +9483,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9521,8 +9521,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: @@ -9537,8 +9537,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: @@ -9609,8 +9609,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: @@ -9621,8 +9621,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9659,8 +9659,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: @@ -9675,8 +9675,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: @@ -9747,8 +9747,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: @@ -9759,8 +9759,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9797,8 +9797,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_release_acquire_cmpxchg: @@ -9813,8 +9813,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_acquire_cmpxchg: @@ -9889,8 +9889,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_release_acquire_cmpxchg: @@ -9901,8 +9901,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9939,8 +9939,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: @@ -9955,8 +9955,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: @@ -10031,8 +10031,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: @@ -10043,8 +10043,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10081,8 +10081,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: @@ -10097,8 +10097,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: @@ -10173,8 +10173,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: @@ -10185,8 +10185,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10223,8 +10223,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: @@ -10239,8 +10239,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: @@ -10315,8 +10315,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: @@ -10327,8 +10327,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10365,8 +10365,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: @@ -10381,8 +10381,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: @@ -10457,8 +10457,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: @@ -10469,8 +10469,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10507,8 +10507,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: @@ -10523,8 +10523,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: @@ -10599,8 +10599,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: @@ -10611,8 +10611,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10649,8 +10649,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: @@ -10665,8 +10665,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: @@ -10741,8 +10741,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: @@ -10753,8 +10753,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10791,8 +10791,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: @@ -10807,8 +10807,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: @@ -10883,8 +10883,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: @@ -10895,8 +10895,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11080,8 +11080,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -11100,8 +11100,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -11186,8 +11186,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -11200,8 +11200,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -11393,8 +11393,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -11413,8 +11413,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -11503,8 +11503,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -11517,8 +11517,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -11563,8 +11563,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -11583,8 +11583,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -11673,8 +11673,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -11687,8 +11687,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -11733,8 +11733,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -11753,8 +11753,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -11839,8 +11839,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -11853,8 +11853,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -11899,8 +11899,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -11919,8 +11919,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -12005,8 +12005,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -12019,8 +12019,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -12065,8 +12065,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -12085,8 +12085,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -12175,8 +12175,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -12189,8 +12189,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -12235,8 +12235,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -12255,8 +12255,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -12345,8 +12345,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -12359,8 +12359,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -12405,8 +12405,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -12425,8 +12425,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -12515,8 +12515,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -12529,8 +12529,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -12575,8 +12575,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -12595,8 +12595,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -12685,8 +12685,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -12699,8 +12699,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -12745,8 +12745,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -12765,8 +12765,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -12855,8 +12855,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -12869,8 +12869,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -12915,8 +12915,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -12935,8 +12935,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -13025,8 +13025,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -13039,8 +13039,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -13085,8 +13085,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -13105,8 +13105,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -13195,8 +13195,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -13209,8 +13209,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm @@ -13255,8 +13255,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) @@ -13275,8 +13275,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) @@ -13365,8 +13365,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm @@ -13379,8 +13379,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll index 0a7a07e..0449e41 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll @@ -317,8 +317,8 @@ define amdgpu_kernel void @global_agent_acquire_load( ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -329,8 +329,8 @@ define amdgpu_kernel void @global_agent_acquire_load( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -400,8 +400,8 @@ define amdgpu_kernel void @global_agent_acquire_load( ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -414,8 +414,8 @@ define amdgpu_kernel void @global_agent_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -465,8 +465,8 @@ define amdgpu_kernel void @global_agent_seq_cst_load( ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -477,8 +477,8 @@ define amdgpu_kernel void @global_agent_seq_cst_load( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -548,8 +548,8 @@ define amdgpu_kernel void @global_agent_seq_cst_load( ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -562,8 +562,8 @@ define amdgpu_kernel void @global_agent_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1241,8 +1241,8 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_acquire_atomicrmw: @@ -1255,8 +1255,8 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_atomicrmw: @@ -1328,8 +1328,8 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_acquire_atomicrmw: @@ -1341,8 +1341,8 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in) { entry: @@ -1514,8 +1514,8 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_acq_rel_atomicrmw: @@ -1528,8 +1528,8 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_atomicrmw: @@ -1603,8 +1603,8 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_acq_rel_atomicrmw: @@ -1616,8 +1616,8 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in) { entry: @@ -1662,8 +1662,8 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_seq_cst_atomicrmw: @@ -1676,8 +1676,8 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_atomicrmw: @@ -1751,8 +1751,8 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_seq_cst_atomicrmw: @@ -1764,8 +1764,8 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in) { entry: @@ -1812,8 +1812,8 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -1827,8 +1827,8 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -1906,8 +1906,8 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1922,8 +1922,8 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1974,8 +1974,8 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -1989,8 +1989,8 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -2070,8 +2070,8 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2086,8 +2086,8 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2138,8 +2138,8 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -2153,8 +2153,8 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -2234,8 +2234,8 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2250,8 +2250,8 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2427,8 +2427,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_acquire_monotonic_cmpxchg: @@ -2440,8 +2440,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_cmpxchg: @@ -2510,8 +2510,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_acquire_monotonic_cmpxchg: @@ -2522,8 +2522,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -2698,8 +2698,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_acq_rel_monotonic_cmpxchg: @@ -2711,8 +2711,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_cmpxchg: @@ -2783,8 +2783,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_acq_rel_monotonic_cmpxchg: @@ -2795,8 +2795,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -2845,8 +2845,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_seq_cst_monotonic_cmpxchg: @@ -2858,8 +2858,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_cmpxchg: @@ -2930,8 +2930,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_seq_cst_monotonic_cmpxchg: @@ -2942,8 +2942,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -2992,8 +2992,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_monotonic_acquire_cmpxchg: @@ -3005,8 +3005,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_acquire_cmpxchg: @@ -3075,8 +3075,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_monotonic_acquire_cmpxchg: @@ -3087,8 +3087,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -3137,8 +3137,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_acquire_acquire_cmpxchg: @@ -3150,8 +3150,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_cmpxchg: @@ -3220,8 +3220,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_acquire_acquire_cmpxchg: @@ -3232,8 +3232,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -3282,8 +3282,8 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_release_acquire_cmpxchg: @@ -3295,8 +3295,8 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_cmpxchg: @@ -3367,8 +3367,8 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_release_acquire_cmpxchg: @@ -3379,8 +3379,8 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -3429,8 +3429,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_acq_rel_acquire_cmpxchg: @@ -3442,8 +3442,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_cmpxchg: @@ -3514,8 +3514,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_acq_rel_acquire_cmpxchg: @@ -3526,8 +3526,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -3576,8 +3576,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_seq_cst_acquire_cmpxchg: @@ -3589,8 +3589,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_cmpxchg: @@ -3661,8 +3661,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_seq_cst_acquire_cmpxchg: @@ -3673,8 +3673,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -3723,8 +3723,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_monotonic_seq_cst_cmpxchg: @@ -3736,8 +3736,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_seq_cst_cmpxchg: @@ -3808,8 +3808,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_monotonic_seq_cst_cmpxchg: @@ -3820,8 +3820,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -3870,8 +3870,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_acquire_seq_cst_cmpxchg: @@ -3883,8 +3883,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_seq_cst_cmpxchg: @@ -3955,8 +3955,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_acquire_seq_cst_cmpxchg: @@ -3967,8 +3967,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -4017,8 +4017,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_release_seq_cst_cmpxchg: @@ -4030,8 +4030,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_release_seq_cst_cmpxchg: @@ -4102,8 +4102,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_release_seq_cst_cmpxchg: @@ -4114,8 +4114,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -4164,8 +4164,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: @@ -4177,8 +4177,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: @@ -4249,8 +4249,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: @@ -4261,8 +4261,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -4311,8 +4311,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: @@ -4324,8 +4324,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: @@ -4396,8 +4396,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: @@ -4408,8 +4408,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -4612,8 +4612,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -4626,8 +4626,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -4702,8 +4702,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4717,8 +4717,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4928,8 +4928,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -4942,8 +4942,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -5020,8 +5020,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5035,8 +5035,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5094,8 +5094,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -5108,8 +5108,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -5186,8 +5186,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5201,8 +5201,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5260,8 +5260,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -5274,8 +5274,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -5350,8 +5350,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5365,8 +5365,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5424,8 +5424,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -5438,8 +5438,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -5514,8 +5514,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5529,8 +5529,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5588,8 +5588,8 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -5602,8 +5602,8 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -5680,8 +5680,8 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5695,8 +5695,8 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5754,8 +5754,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -5768,8 +5768,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -5846,8 +5846,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5861,8 +5861,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5920,8 +5920,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -5934,8 +5934,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -6012,8 +6012,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6027,8 +6027,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6086,8 +6086,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -6100,8 +6100,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -6178,8 +6178,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6193,8 +6193,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6252,8 +6252,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -6266,8 +6266,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -6344,8 +6344,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6359,8 +6359,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6418,8 +6418,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -6432,8 +6432,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -6510,8 +6510,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6525,8 +6525,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6584,8 +6584,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -6598,8 +6598,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -6676,8 +6676,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6691,8 +6691,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6750,8 +6750,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -6764,8 +6764,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -6842,8 +6842,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6857,8 +6857,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -7178,8 +7178,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_load( ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -7190,8 +7190,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_load( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -7261,8 +7261,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_load( ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -7275,8 +7275,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -7326,8 +7326,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load( ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -7338,8 +7338,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -7409,8 +7409,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load( ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -7423,8 +7423,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8102,8 +8102,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_acquire_atomicrmw: @@ -8116,8 +8116,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_atomicrmw: @@ -8189,8 +8189,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_acquire_atomicrmw: @@ -8202,8 +8202,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in) { entry: @@ -8375,8 +8375,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_acq_rel_atomicrmw: @@ -8389,8 +8389,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_atomicrmw: @@ -8464,8 +8464,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_acq_rel_atomicrmw: @@ -8477,8 +8477,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in) { entry: @@ -8523,8 +8523,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_seq_cst_atomicrmw: @@ -8537,8 +8537,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_atomicrmw: @@ -8612,8 +8612,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_seq_cst_atomicrmw: @@ -8625,8 +8625,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in) { entry: @@ -8673,8 +8673,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -8688,8 +8688,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -8767,8 +8767,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8783,8 +8783,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8835,8 +8835,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -8850,8 +8850,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -8931,8 +8931,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8947,8 +8947,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8999,8 +8999,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -9014,8 +9014,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -9095,8 +9095,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -9111,8 +9111,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -9288,8 +9288,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: @@ -9301,8 +9301,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: @@ -9371,8 +9371,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: @@ -9383,8 +9383,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -9559,8 +9559,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: @@ -9572,8 +9572,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: @@ -9644,8 +9644,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: @@ -9656,8 +9656,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -9706,8 +9706,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: @@ -9719,8 +9719,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: @@ -9791,8 +9791,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: @@ -9803,8 +9803,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -9853,8 +9853,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: @@ -9866,8 +9866,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: @@ -9936,8 +9936,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: @@ -9948,8 +9948,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -9998,8 +9998,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: @@ -10011,8 +10011,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: @@ -10081,8 +10081,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: @@ -10093,8 +10093,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -10143,8 +10143,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_release_acquire_cmpxchg: @@ -10156,8 +10156,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_cmpxchg: @@ -10228,8 +10228,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_release_acquire_cmpxchg: @@ -10240,8 +10240,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -10290,8 +10290,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: @@ -10303,8 +10303,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: @@ -10375,8 +10375,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: @@ -10387,8 +10387,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -10437,8 +10437,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: @@ -10450,8 +10450,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: @@ -10522,8 +10522,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: @@ -10534,8 +10534,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -10584,8 +10584,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: @@ -10597,8 +10597,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: @@ -10669,8 +10669,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: @@ -10681,8 +10681,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -10731,8 +10731,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: @@ -10744,8 +10744,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: @@ -10816,8 +10816,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: @@ -10828,8 +10828,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -10878,8 +10878,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: @@ -10891,8 +10891,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: @@ -10963,8 +10963,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: @@ -10975,8 +10975,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -11025,8 +11025,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: @@ -11038,8 +11038,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: @@ -11110,8 +11110,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: @@ -11122,8 +11122,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -11172,8 +11172,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: @@ -11185,8 +11185,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: @@ -11257,8 +11257,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: @@ -11269,8 +11269,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -11473,8 +11473,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -11487,8 +11487,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -11563,8 +11563,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11578,8 +11578,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11637,8 +11637,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -11651,8 +11651,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -11729,8 +11729,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11744,8 +11744,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11803,8 +11803,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -11817,8 +11817,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -11895,8 +11895,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11910,8 +11910,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11969,8 +11969,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -11983,8 +11983,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -12059,8 +12059,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12074,8 +12074,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12133,8 +12133,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -12147,8 +12147,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -12223,8 +12223,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12238,8 +12238,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12297,8 +12297,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -12311,8 +12311,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -12389,8 +12389,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12404,8 +12404,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12463,8 +12463,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -12477,8 +12477,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -12555,8 +12555,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12570,8 +12570,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12629,8 +12629,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -12643,8 +12643,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -12721,8 +12721,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12736,8 +12736,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12795,8 +12795,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -12809,8 +12809,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -12887,8 +12887,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12902,8 +12902,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12961,8 +12961,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -12975,8 +12975,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -13053,8 +13053,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -13068,8 +13068,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -13127,8 +13127,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -13141,8 +13141,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -13219,8 +13219,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -13234,8 +13234,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -13293,8 +13293,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -13307,8 +13307,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -13385,8 +13385,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -13400,8 +13400,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -13459,8 +13459,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -13473,8 +13473,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -13551,8 +13551,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -13566,8 +13566,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll index 97b2f08..f611003 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll @@ -317,8 +317,8 @@ define amdgpu_kernel void @global_system_acquire_load( ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -329,8 +329,8 @@ define amdgpu_kernel void @global_system_acquire_load( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -402,8 +402,8 @@ define amdgpu_kernel void @global_system_acquire_load( ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -416,8 +416,8 @@ define amdgpu_kernel void @global_system_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -467,8 +467,8 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -479,8 +479,8 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -552,8 +552,8 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -566,8 +566,8 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1249,8 +1249,8 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_acquire_atomicrmw: @@ -1263,8 +1263,8 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_atomicrmw: @@ -1338,8 +1338,8 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_acquire_atomicrmw: @@ -1351,8 +1351,8 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in) { entry: @@ -1526,8 +1526,8 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_acq_rel_atomicrmw: @@ -1540,8 +1540,8 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_atomicrmw: @@ -1619,8 +1619,8 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_acq_rel_atomicrmw: @@ -1632,8 +1632,8 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in) { entry: @@ -1678,8 +1678,8 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_seq_cst_atomicrmw: @@ -1692,8 +1692,8 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_atomicrmw: @@ -1771,8 +1771,8 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_seq_cst_atomicrmw: @@ -1784,8 +1784,8 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in) { entry: @@ -1832,8 +1832,8 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -1847,8 +1847,8 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -1928,8 +1928,8 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1944,8 +1944,8 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1996,8 +1996,8 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -2011,8 +2011,8 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -2096,8 +2096,8 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2112,8 +2112,8 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2164,8 +2164,8 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -2179,8 +2179,8 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -2264,8 +2264,8 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2280,8 +2280,8 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2457,8 +2457,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_acquire_monotonic_cmpxchg: @@ -2470,8 +2470,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_cmpxchg: @@ -2542,8 +2542,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_acquire_monotonic_cmpxchg: @@ -2554,8 +2554,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -2732,8 +2732,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_acq_rel_monotonic_cmpxchg: @@ -2745,8 +2745,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_cmpxchg: @@ -2821,8 +2821,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_acq_rel_monotonic_cmpxchg: @@ -2833,8 +2833,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -2883,8 +2883,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_seq_cst_monotonic_cmpxchg: @@ -2896,8 +2896,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_cmpxchg: @@ -2972,8 +2972,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_seq_cst_monotonic_cmpxchg: @@ -2984,8 +2984,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -3034,8 +3034,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_monotonic_acquire_cmpxchg: @@ -3047,8 +3047,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_acquire_cmpxchg: @@ -3119,8 +3119,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_monotonic_acquire_cmpxchg: @@ -3131,8 +3131,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -3181,8 +3181,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_acquire_acquire_cmpxchg: @@ -3194,8 +3194,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_cmpxchg: @@ -3266,8 +3266,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_acquire_acquire_cmpxchg: @@ -3278,8 +3278,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -3328,8 +3328,8 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_release_acquire_cmpxchg: @@ -3341,8 +3341,8 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_release_acquire_cmpxchg: @@ -3417,8 +3417,8 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_release_acquire_cmpxchg: @@ -3429,8 +3429,8 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -3479,8 +3479,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_acq_rel_acquire_cmpxchg: @@ -3492,8 +3492,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_cmpxchg: @@ -3568,8 +3568,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_acq_rel_acquire_cmpxchg: @@ -3580,8 +3580,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -3630,8 +3630,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_seq_cst_acquire_cmpxchg: @@ -3643,8 +3643,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_cmpxchg: @@ -3719,8 +3719,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_seq_cst_acquire_cmpxchg: @@ -3731,8 +3731,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -3781,8 +3781,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_seq_cst_seq_cst_cmpxchg: @@ -3794,8 +3794,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_cmpxchg: @@ -3870,8 +3870,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_seq_cst_seq_cst_cmpxchg: @@ -3882,8 +3882,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -4086,8 +4086,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -4100,8 +4100,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -4178,8 +4178,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4193,8 +4193,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4252,8 +4252,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -4266,8 +4266,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -4348,8 +4348,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4363,8 +4363,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4422,8 +4422,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -4436,8 +4436,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -4518,8 +4518,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4533,8 +4533,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4592,8 +4592,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -4606,8 +4606,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -4684,8 +4684,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4699,8 +4699,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4758,8 +4758,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -4772,8 +4772,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -4850,8 +4850,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4865,8 +4865,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4924,8 +4924,8 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -4938,8 +4938,8 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -5020,8 +5020,8 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5035,8 +5035,8 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5094,8 +5094,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -5108,8 +5108,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -5190,8 +5190,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5205,8 +5205,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5264,8 +5264,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -5278,8 +5278,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -5360,8 +5360,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5375,8 +5375,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5434,8 +5434,8 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -5448,8 +5448,8 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -5530,8 +5530,8 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5545,8 +5545,8 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5604,8 +5604,8 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -5618,8 +5618,8 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -5700,8 +5700,8 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5715,8 +5715,8 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5774,8 +5774,8 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -5788,8 +5788,8 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -5870,8 +5870,8 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5885,8 +5885,8 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -5944,8 +5944,8 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -5958,8 +5958,8 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -6040,8 +6040,8 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6055,8 +6055,8 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6114,8 +6114,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -6128,8 +6128,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -6210,8 +6210,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6225,8 +6225,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6546,8 +6546,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_load( ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -6558,8 +6558,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_load( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -6631,8 +6631,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_load( ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6645,8 +6645,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6696,8 +6696,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -6708,8 +6708,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -6781,8 +6781,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -6795,8 +6795,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -7478,8 +7478,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_acquire_atomicrmw: @@ -7492,8 +7492,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_atomicrmw: @@ -7567,8 +7567,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_acquire_atomicrmw: @@ -7580,8 +7580,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in) { entry: @@ -7755,8 +7755,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_acq_rel_atomicrmw: @@ -7769,8 +7769,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_atomicrmw: @@ -7848,8 +7848,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_acq_rel_atomicrmw: @@ -7861,8 +7861,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in) { entry: @@ -7907,8 +7907,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_seq_cst_atomicrmw: @@ -7921,8 +7921,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_atomicrmw: @@ -8000,8 +8000,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_seq_cst_atomicrmw: @@ -8013,8 +8013,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in) { entry: @@ -8061,8 +8061,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -8076,8 +8076,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -8157,8 +8157,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8173,8 +8173,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8225,8 +8225,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -8240,8 +8240,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -8325,8 +8325,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8341,8 +8341,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8393,8 +8393,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -8408,8 +8408,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -8493,8 +8493,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8509,8 +8509,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8686,8 +8686,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: @@ -8699,8 +8699,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: @@ -8771,8 +8771,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: @@ -8783,8 +8783,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -8961,8 +8961,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: @@ -8974,8 +8974,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: @@ -9050,8 +9050,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: @@ -9062,8 +9062,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -9112,8 +9112,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: @@ -9125,8 +9125,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: @@ -9201,8 +9201,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: @@ -9213,8 +9213,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -9263,8 +9263,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: @@ -9276,8 +9276,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: @@ -9348,8 +9348,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: @@ -9360,8 +9360,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -9410,8 +9410,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_acquire_acquire_cmpxchg: @@ -9423,8 +9423,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_cmpxchg: @@ -9495,8 +9495,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_acquire_acquire_cmpxchg: @@ -9507,8 +9507,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -9557,8 +9557,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_release_acquire_cmpxchg: @@ -9570,8 +9570,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_cmpxchg: @@ -9646,8 +9646,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_release_acquire_cmpxchg: @@ -9658,8 +9658,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -9708,8 +9708,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: @@ -9721,8 +9721,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: @@ -9797,8 +9797,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: @@ -9809,8 +9809,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -9859,8 +9859,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: @@ -9872,8 +9872,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: @@ -9948,8 +9948,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: @@ -9960,8 +9960,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -10010,8 +10010,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: @@ -10023,8 +10023,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: @@ -10099,8 +10099,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: @@ -10111,8 +10111,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -10161,8 +10161,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: @@ -10174,8 +10174,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: @@ -10250,8 +10250,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: @@ -10262,8 +10262,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -10312,8 +10312,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_release_seq_cst_cmpxchg: @@ -10325,8 +10325,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_seq_cst_cmpxchg: @@ -10401,8 +10401,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_release_seq_cst_cmpxchg: @@ -10413,8 +10413,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -10463,8 +10463,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: @@ -10476,8 +10476,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: @@ -10552,8 +10552,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: @@ -10564,8 +10564,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -10614,8 +10614,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: @@ -10627,8 +10627,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: @@ -10703,8 +10703,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: @@ -10715,8 +10715,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %out, i32 %in, i32 %old) { entry: @@ -10919,8 +10919,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -10933,8 +10933,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -11011,8 +11011,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11026,8 +11026,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11239,8 +11239,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -11253,8 +11253,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -11335,8 +11335,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11350,8 +11350,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11409,8 +11409,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -11423,8 +11423,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -11505,8 +11505,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11520,8 +11520,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11579,8 +11579,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -11593,8 +11593,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -11671,8 +11671,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11686,8 +11686,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11745,8 +11745,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -11759,8 +11759,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -11837,8 +11837,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11852,8 +11852,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -11911,8 +11911,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -11925,8 +11925,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -12007,8 +12007,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12022,8 +12022,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12081,8 +12081,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -12095,8 +12095,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -12177,8 +12177,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12192,8 +12192,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12251,8 +12251,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -12265,8 +12265,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -12347,8 +12347,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12362,8 +12362,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12421,8 +12421,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -12435,8 +12435,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -12517,8 +12517,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12532,8 +12532,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12591,8 +12591,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -12605,8 +12605,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -12687,8 +12687,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12702,8 +12702,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12761,8 +12761,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -12775,8 +12775,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -12857,8 +12857,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12872,8 +12872,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -12931,8 +12931,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -12945,8 +12945,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -13027,8 +13027,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -13042,8 +13042,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -13101,8 +13101,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv +; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -13115,8 +13115,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv +; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -13197,8 +13197,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv +; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -13212,8 +13212,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv +; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |