; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s @bar = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison @bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison @bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison define void @func1() { ; GFX12-SDAG-LABEL: func1: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, 3 ; GFX12-SDAG-NEXT: s_barrier_join m0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70003 ; GFX12-SDAG-NEXT: s_barrier_signal m0 ; GFX12-SDAG-NEXT: s_barrier_wait 1 ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: func1: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70003 ; GFX12-GISEL-NEXT: s_barrier_join 3 ; GFX12-GISEL-NEXT: s_barrier_signal m0 ; GFX12-GISEL-NEXT: s_barrier_wait 1 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar3) call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar3, i32 7) call void @llvm.amdgcn.s.barrier.wait(i16 1) ret void } define void @func2() { ; GFX12-SDAG-LABEL: func2: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, 1 ; GFX12-SDAG-NEXT: s_barrier_join m0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70001 ; GFX12-SDAG-NEXT: s_barrier_signal m0 ; GFX12-SDAG-NEXT: s_barrier_wait 1 ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: func2: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70001 ; GFX12-GISEL-NEXT: s_barrier_join 1 ; GFX12-GISEL-NEXT: s_barrier_signal m0 ; GFX12-GISEL-NEXT: s_barrier_wait 1 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar2) call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar2, i32 7) call void @llvm.amdgcn.s.barrier.wait(i16 1) ret void } define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 { ; GFX12-SDAG-LABEL: kernel1: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX12-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX12-SDAG-NEXT: s_mov_b32 m0, 0xc0002 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v31, v0 ; GFX12-SDAG-NEXT: s_barrier_init m0 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[8:9], s[4:5], 48 ; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX12-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_lshr_b32 s2, s2, 4 ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-SDAG-NEXT: s_and_b32 s2, s2, 63 ; GFX12-SDAG-NEXT: s_or_b32 s3, 0x90000, s2 ; GFX12-SDAG-NEXT: s_cmp_eq_u32 0, 0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, s3 ; GFX12-SDAG-NEXT: s_barrier_init m0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, 0xc0002 ; GFX12-SDAG-NEXT: s_barrier_signal m0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, s3 ; GFX12-SDAG-NEXT: s_barrier_signal m0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, s2 ; GFX12-SDAG-NEXT: s_barrier_signal -1 ; GFX12-SDAG-NEXT: s_barrier_join m0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, 2 ; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1 ; GFX12-SDAG-NEXT: s_barrier_wait 1 ; GFX12-SDAG-NEXT: s_barrier_leave ; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, s2 ; GFX12-SDAG-NEXT: s_get_barrier_state s2, m0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_getpc_b64 s[2:3] ; GFX12-SDAG-NEXT: s_sext_i32_i16 s3, s3 ; GFX12-SDAG-NEXT: s_add_co_u32 s2, s2, func1@gotpcrel32@lo+8 ; GFX12-SDAG-NEXT: s_add_co_ci_u32 s3, s3, func1@gotpcrel32@hi+16 ; GFX12-SDAG-NEXT: s_barrier_signal -1 ; GFX12-SDAG-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 ; GFX12-SDAG-NEXT: s_barrier_wait -1 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX12-SDAG-NEXT: s_getpc_b64 s[2:3] ; GFX12-SDAG-NEXT: s_wait_alu 0xfffe ; GFX12-SDAG-NEXT: s_sext_i32_i16 s3, s3 ; GFX12-SDAG-NEXT: s_add_co_u32 s2, s2, func2@gotpcrel32@lo+12 ; GFX12-SDAG-NEXT: s_wait_alu 0xfffe ; GFX12-SDAG-NEXT: s_add_co_ci_u32 s3, s3, func2@gotpcrel32@hi+24 ; GFX12-SDAG-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[2:3] ; GFX12-SDAG-NEXT: s_get_barrier_state s0, -1 ; GFX12-SDAG-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: kernel1: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX12-GISEL-NEXT: s_load_b32 s0, s[12:13], 0x2c ; GFX12-GISEL-NEXT: s_mov_b32 m0, 0xc0002 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v31, v0 ; GFX12-GISEL-NEXT: s_barrier_init m0 ; GFX12-GISEL-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX12-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX12-GISEL-NEXT: s_mov_b32 s32, 0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshr_b32 s0, s0, 4 ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: s_and_b32 s0, s0, 63 ; GFX12-GISEL-NEXT: s_or_b32 s1, s0, 0x90000 ; GFX12-GISEL-NEXT: s_cmp_eq_u32 0, 0 ; GFX12-GISEL-NEXT: s_mov_b32 m0, s1 ; GFX12-GISEL-NEXT: s_barrier_init m0 ; GFX12-GISEL-NEXT: s_mov_b32 m0, 0xc0002 ; GFX12-GISEL-NEXT: s_barrier_signal m0 ; GFX12-GISEL-NEXT: s_mov_b32 m0, s1 ; GFX12-GISEL-NEXT: s_barrier_signal m0 ; GFX12-GISEL-NEXT: s_mov_b32 m0, s0 ; GFX12-GISEL-NEXT: s_barrier_signal -1 ; GFX12-GISEL-NEXT: s_barrier_join m0 ; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48 ; GFX12-GISEL-NEXT: s_barrier_wait 1 ; GFX12-GISEL-NEXT: s_barrier_leave ; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_get_barrier_state s0, m0 ; GFX12-GISEL-NEXT: s_add_co_ci_u32 s9, s13, 0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_getpc_b64 s[0:1] ; GFX12-GISEL-NEXT: s_sext_i32_i16 s1, s1 ; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, func1@gotpcrel32@lo+8 ; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, func1@gotpcrel32@hi+16 ; GFX12-GISEL-NEXT: s_barrier_signal -1 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX12-GISEL-NEXT: s_barrier_wait -1 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48 ; GFX12-GISEL-NEXT: s_add_co_ci_u32 s9, s13, 0 ; GFX12-GISEL-NEXT: s_getpc_b64 s[0:1] ; GFX12-GISEL-NEXT: s_wait_alu 0xfffe ; GFX12-GISEL-NEXT: s_sext_i32_i16 s1, s1 ; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, func2@gotpcrel32@lo+12 ; GFX12-GISEL-NEXT: s_wait_alu 0xfffe ; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, func2@gotpcrel32@hi+24 ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX12-GISEL-NEXT: s_get_barrier_state s0, -1 ; GFX12-GISEL-NEXT: s_endpgm call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) @bar, i32 12) call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) %in, i32 9) call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar, i32 12) call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) %in, i32 9) call void @llvm.amdgcn.s.barrier.signal(i32 -1) call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %in) %isfirst = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1) call void @llvm.amdgcn.s.barrier.wait(i16 1) call void @llvm.amdgcn.s.barrier.leave(i16 1) %state = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) @bar) %state2 = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) %in) call void @llvm.amdgcn.s.barrier() call void @func1() call void @func2() %state3 = call i32 @llvm.amdgcn.s.get.barrier.state(i32 -1) ret void } define amdgpu_kernel void @kernel2(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 { ; GFX12-SDAG-LABEL: kernel2: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX12-SDAG-NEXT: s_getpc_b64 s[6:7] ; GFX12-SDAG-NEXT: s_sext_i32_i16 s7, s7 ; GFX12-SDAG-NEXT: s_add_co_u32 s6, s6, func2@gotpcrel32@lo+8 ; GFX12-SDAG-NEXT: s_add_co_ci_u32 s7, s7, func2@gotpcrel32@hi+16 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v31, v0 ; GFX12-SDAG-NEXT: s_load_b64 s[12:13], s[6:7], 0x0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70002 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[8:9], s[4:5], 48 ; GFX12-SDAG-NEXT: s_barrier_signal m0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, 2 ; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX12-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX12-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX12-SDAG-NEXT: s_barrier_join m0 ; GFX12-SDAG-NEXT: s_barrier_wait 1 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GFX12-SDAG-NEXT: s_endpgm ; ; GFX12-GISEL-LABEL: kernel2: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_add_co_u32 s8, s4, 48 ; GFX12-GISEL-NEXT: s_add_co_ci_u32 s9, s5, 0 ; GFX12-GISEL-NEXT: s_getpc_b64 s[4:5] ; GFX12-GISEL-NEXT: s_sext_i32_i16 s5, s5 ; GFX12-GISEL-NEXT: s_add_co_u32 s4, s4, func2@gotpcrel32@lo+8 ; GFX12-GISEL-NEXT: s_add_co_ci_u32 s5, s5, func2@gotpcrel32@hi+16 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v31, v0 ; GFX12-GISEL-NEXT: s_load_b64 s[12:13], s[4:5], 0x0 ; GFX12-GISEL-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70002 ; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX12-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX12-GISEL-NEXT: s_mov_b32 s32, 0 ; GFX12-GISEL-NEXT: s_barrier_signal m0 ; GFX12-GISEL-NEXT: s_barrier_join 2 ; GFX12-GISEL-NEXT: s_barrier_wait 1 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GFX12-GISEL-NEXT: s_endpgm call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar, i32 7) call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar) call void @llvm.amdgcn.s.barrier.wait(i16 1) call void @func2() ret void } declare void @llvm.amdgcn.s.barrier() #1 declare void @llvm.amdgcn.s.barrier.wait(i16) #1 declare void @llvm.amdgcn.s.barrier.signal(i32) #1 declare void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3), i32) #1 declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32) #1 declare void @llvm.amdgcn.s.barrier.init(ptr addrspace(3), i32) #1 declare void @llvm.amdgcn.s.barrier.join(ptr addrspace(3)) #1 declare void @llvm.amdgcn.s.barrier.leave(i16) #1 declare i32 @llvm.amdgcn.s.get.barrier.state(i32) #1 declare i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3)) #1 attributes #0 = { nounwind } attributes #1 = { convergent nounwind } attributes #2 = { nounwind readnone }