; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s ; ============================================================================= ; atomicrmw or - generic address space (addrspace 0) ; ============================================================================= define i32 @atomicrmw_or_i32_generic(ptr addrspace(0) %ptr, i32 %val) { ; GFX12-LABEL: atomicrmw_or_i32_generic: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: global_wb scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: flat_atomic_or_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] %result = atomicrmw or ptr addrspace(0) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } define i64 @atomicrmw_or_i64_generic(ptr addrspace(0) %ptr, i64 %val) { ; GFX12-LABEL: atomicrmw_or_i64_generic: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: flat_atomic_or_b64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %result = atomicrmw or ptr addrspace(0) %ptr, i64 %val syncscope("workgroup") monotonic, !noalias.addrspace !0 ret i64 %result } ; ============================================================================= ; atomicrmw or - local address space (addrspace 3) ; ============================================================================= define i32 @atomicrmw_or_i32_local(ptr addrspace(3) %ptr, i32 %val) { ; GFX12-LABEL: atomicrmw_or_i32_local: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: ds_or_rtn_b32 v0, v0, v1 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] %result = atomicrmw or ptr addrspace(3) %ptr, i32 %val seq_cst ret i32 %result } define i64 @atomicrmw_or_i64_local(ptr addrspace(3) %ptr, i64 %val) { ; GFX12-LABEL: atomicrmw_or_i64_local: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: ds_or_rtn_b64 v[0:1], v0, v[1:2] ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] %result = atomicrmw or ptr addrspace(3) %ptr, i64 %val seq_cst ret i64 %result } ; ============================================================================= ; atomicrmw or with metadata - global address space (no expansion) ; ============================================================================= define i32 @atomicrmw_or_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) { ; GFX12-LABEL: atomicrmw_or_i32_global_no_remote_memory: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: global_wb scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: global_atomic_or_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] %result = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } define i64 @atomicrmw_or_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) { ; GFX12-LABEL: atomicrmw_or_i64_global_no_remote_memory: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: global_wb scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: global_atomic_or_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] %result = atomicrmw or ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0 ret i64 %result } define i32 @atomicrmw_or_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) { ; GFX12-LABEL: atomicrmw_or_i32_global_no_fine_grained_memory: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: global_wb scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: global_atomic_or_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] %result = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret i32 %result } define i64 @atomicrmw_or_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) { ; GFX12-LABEL: atomicrmw_or_i64_global_no_fine_grained_memory: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: global_wb scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: global_atomic_or_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] %result = atomicrmw or ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret i64 %result } !0 = !{i32 5, i32 6}