; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=CHECK,CHECK-TRUE16 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=CHECK,CHECK-FAKE16 %s ; Make sure we use a stack pointer and allocate 112 * 4 bytes at the beginning of the stack. define amdgpu_cs void @amdgpu_cs() #0 { ; CHECK-LABEL: amdgpu_cs: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; CHECK-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-NEXT: s_alloc_vgpr 0 ; CHECK-NEXT: s_endpgm ret void } define amdgpu_kernel void @kernel() #0 { ; CHECK-LABEL: kernel: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; CHECK-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-NEXT: s_alloc_vgpr 0 ; CHECK-NEXT: s_endpgm ret void } define amdgpu_cs void @with_local() #0 { ; CHECK-TRUE16-LABEL: with_local: ; CHECK-TRUE16: ; %bb.0: ; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: s_alloc_vgpr 0 ; CHECK-TRUE16-NEXT: s_endpgm ; ; CHECK-FAKE16-LABEL: with_local: ; CHECK-FAKE16: ; %bb.0: ; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13 ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: s_alloc_vgpr 0 ; CHECK-FAKE16-NEXT: s_endpgm %local = alloca i32, addrspace(5) store volatile i8 13, ptr addrspace(5) %local ret void } ; Check that we generate s_cselect for SP if we can fit ; the offset in an inline constant. define amdgpu_cs void @with_calls_inline_const() #0 { ; CHECK-TRUE16-LABEL: with_calls_inline_const: ; CHECK-TRUE16: ; %bb.0: ; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47 ; CHECK-TRUE16-NEXT: s_cselect_b32 s32, 0x1d0, 16 ; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; CHECK-TRUE16-NEXT: s_alloc_vgpr 0 ; CHECK-TRUE16-NEXT: s_endpgm ; ; CHECK-FAKE16-LABEL: with_calls_inline_const: ; CHECK-FAKE16: ; %bb.0: ; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47 ; CHECK-FAKE16-NEXT: s_cselect_b32 s32, 0x1d0, 16 ; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; CHECK-FAKE16-NEXT: s_alloc_vgpr 0 ; CHECK-FAKE16-NEXT: s_endpgm %local = alloca i32, addrspace(5) store volatile i8 15, ptr addrspace(5) %local call amdgpu_gfx void @callee(i32 71) ret void } ; Check that we generate s_mov + s_cmovk if we can't ; fit the offset for SP in an inline constant. define amdgpu_cs void @with_calls_no_inline_const() #0 { ; CHECK-TRUE16-LABEL: with_calls_no_inline_const: ; CHECK-TRUE16: ; %bb.0: ; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47 ; CHECK-TRUE16-NEXT: s_movk_i32 s32, 0x100 ; CHECK-TRUE16-NEXT: s_cmovk_i32 s32, 0x2c0 ; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; CHECK-TRUE16-NEXT: s_alloc_vgpr 0 ; CHECK-TRUE16-NEXT: s_endpgm ; ; CHECK-FAKE16-LABEL: with_calls_no_inline_const: ; CHECK-FAKE16: ; %bb.0: ; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47 ; CHECK-FAKE16-NEXT: s_movk_i32 s32, 0x100 ; CHECK-FAKE16-NEXT: s_cmovk_i32 s32, 0x2c0 ; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; CHECK-FAKE16-NEXT: s_alloc_vgpr 0 ; CHECK-FAKE16-NEXT: s_endpgm %local = alloca i32, i32 61, addrspace(5) store volatile i8 15, ptr addrspace(5) %local call amdgpu_gfx void @callee(i32 71) ret void } define amdgpu_cs void @with_spills() #0 { ; CHECK-LABEL: with_spills: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; CHECK-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-NEXT: s_alloc_vgpr 0 ; CHECK-NEXT: s_endpgm call void asm "; spills", "~{v40},~{v42}"() ret void } define amdgpu_cs void @realign_stack(<32 x i32> %x) #0 { ; CHECK-LABEL: realign_stack: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-NEXT: s_mov_b32 s0, callee@abs32@lo ; CHECK-NEXT: s_cmovk_i32 s33, 0x200 ; CHECK-NEXT: s_movk_i32 s32, 0x100 ; CHECK-NEXT: s_clause 0x7 ; CHECK-NEXT: scratch_store_b128 off, v[28:31], s33 offset:112 ; CHECK-NEXT: scratch_store_b128 off, v[24:27], s33 offset:96 ; CHECK-NEXT: scratch_store_b128 off, v[20:23], s33 offset:80 ; CHECK-NEXT: scratch_store_b128 off, v[16:19], s33 offset:64 ; CHECK-NEXT: scratch_store_b128 off, v[12:15], s33 offset:48 ; CHECK-NEXT: scratch_store_b128 off, v[8:11], s33 offset:32 ; CHECK-NEXT: scratch_store_b128 off, v[4:7], s33 offset:16 ; CHECK-NEXT: scratch_store_b128 off, v[0:3], s33 ; CHECK-NEXT: v_mov_b32_e32 v0, 0x47 ; CHECK-NEXT: s_cmovk_i32 s32, 0x300 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[0:1] ; CHECK-NEXT: s_alloc_vgpr 0 ; CHECK-NEXT: s_endpgm %v = alloca <32 x i32>, align 128, addrspace(5) store <32 x i32> %x, ptr addrspace(5) %v call amdgpu_gfx void @callee(i32 71) ret void } define amdgpu_cs void @frame_pointer_none() #1 { ; CHECK-TRUE16-LABEL: frame_pointer_none: ; CHECK-TRUE16: ; %bb.0: ; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: s_alloc_vgpr 0 ; CHECK-TRUE16-NEXT: s_endpgm ; ; CHECK-FAKE16-LABEL: frame_pointer_none: ; CHECK-FAKE16: ; %bb.0: ; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13 ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: s_alloc_vgpr 0 ; CHECK-FAKE16-NEXT: s_endpgm %local = alloca i32, addrspace(5) store volatile i8 13, ptr addrspace(5) %local ret void } define amdgpu_cs void @frame_pointer_all() #2 { ; CHECK-TRUE16-LABEL: frame_pointer_all: ; CHECK-TRUE16: ; %bb.0: ; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: s_alloc_vgpr 0 ; CHECK-TRUE16-NEXT: s_endpgm ; ; CHECK-FAKE16-LABEL: frame_pointer_all: ; CHECK-FAKE16: ; %bb.0: ; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13 ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: s_alloc_vgpr 0 ; CHECK-FAKE16-NEXT: s_endpgm %local = alloca i32, addrspace(5) store volatile i8 13, ptr addrspace(5) %local ret void } ; Non-entry functions and graphics shaders don't need to worry about CWSR. define amdgpu_gs void @amdgpu_gs() #0 { ; CHECK-TRUE16-LABEL: amdgpu_gs: ; CHECK-TRUE16: ; %bb.0: ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo ; CHECK-TRUE16-NEXT: s_mov_b32 s32, 16 ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, off scope:SCOPE_SYS ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47 ; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; CHECK-TRUE16-NEXT: s_alloc_vgpr 0 ; CHECK-TRUE16-NEXT: s_endpgm ; ; CHECK-FAKE16-LABEL: amdgpu_gs: ; CHECK-FAKE16: ; %bb.0: ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo ; CHECK-FAKE16-NEXT: s_mov_b32 s32, 16 ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, off scope:SCOPE_SYS ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47 ; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; CHECK-FAKE16-NEXT: s_alloc_vgpr 0 ; CHECK-FAKE16-NEXT: s_endpgm %local = alloca i32, addrspace(5) store volatile i8 15, ptr addrspace(5) %local call amdgpu_gfx void @callee(i32 71) ret void } define amdgpu_gfx void @amdgpu_gfx() #0 { ; CHECK-TRUE16-LABEL: amdgpu_gfx: ; CHECK-TRUE16: ; %bb.0: ; CHECK-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; CHECK-TRUE16-NEXT: s_wait_expcnt 0x0 ; CHECK-TRUE16-NEXT: s_wait_samplecnt 0x0 ; CHECK-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; CHECK-TRUE16-NEXT: s_wait_kmcnt 0x0 ; CHECK-TRUE16-NEXT: s_mov_b32 s0, s33 ; CHECK-TRUE16-NEXT: s_mov_b32 s33, s32 ; CHECK-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; CHECK-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill ; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe ; CHECK-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo ; CHECK-TRUE16-NEXT: s_add_co_i32 s32, s32, 16 ; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47 ; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe ; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; CHECK-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; CHECK-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 ; CHECK-TRUE16-NEXT: s_mov_b32 s32, s33 ; CHECK-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; CHECK-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; CHECK-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload ; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe ; CHECK-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; CHECK-TRUE16-NEXT: s_mov_b32 s33, s0 ; CHECK-TRUE16-NEXT: s_wait_loadcnt 0x0 ; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe ; CHECK-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; CHECK-FAKE16-LABEL: amdgpu_gfx: ; CHECK-FAKE16: ; %bb.0: ; CHECK-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; CHECK-FAKE16-NEXT: s_wait_expcnt 0x0 ; CHECK-FAKE16-NEXT: s_wait_samplecnt 0x0 ; CHECK-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; CHECK-FAKE16-NEXT: s_wait_kmcnt 0x0 ; CHECK-FAKE16-NEXT: s_mov_b32 s0, s33 ; CHECK-FAKE16-NEXT: s_mov_b32 s33, s32 ; CHECK-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; CHECK-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill ; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe ; CHECK-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo ; CHECK-FAKE16-NEXT: s_add_co_i32 s32, s32, 16 ; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47 ; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe ; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; CHECK-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; CHECK-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 ; CHECK-FAKE16-NEXT: s_mov_b32 s32, s33 ; CHECK-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; CHECK-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; CHECK-FAKE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload ; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe ; CHECK-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; CHECK-FAKE16-NEXT: s_mov_b32 s33, s0 ; CHECK-FAKE16-NEXT: s_wait_loadcnt 0x0 ; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe ; CHECK-FAKE16-NEXT: s_setpc_b64 s[30:31] %local = alloca i32, addrspace(5) store volatile i8 15, ptr addrspace(5) %local call amdgpu_gfx void @callee(i32 71) ret void } define void @default() #0 { ; CHECK-LABEL: default: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 ; CHECK-NEXT: s_wait_expcnt 0x0 ; CHECK-NEXT: s_wait_samplecnt 0x0 ; CHECK-NEXT: s_wait_bvhcnt 0x0 ; CHECK-NEXT: s_wait_kmcnt 0x0 ; CHECK-NEXT: s_setpc_b64 s[30:31] ret void } declare amdgpu_gfx void @callee(i32) #0 attributes #0 = { nounwind "amdgpu-dynamic-vgpr-block-size"="16" } attributes #1 = { nounwind "frame-pointer"="none" "amdgpu-dynamic-vgpr-block-size"="16" } attributes #2 = { nounwind "frame-pointer"="all" "amdgpu-dynamic-vgpr-block-size"="16" }