; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s define void @func_empty() #0 { ; GCN-LABEL: func_empty: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ret void } define void @func_areg_4() #0 { ; GCN-LABEL: func_areg_4: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use agpr3 ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; use agpr3", "~{a3}" () ret void } define void @func_areg_32() #0 { ; GCN-LABEL: func_areg_32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use agpr31 ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; use agpr31", "~{a31}" () ret void } define void @func_areg_33() #0 { ; GFX90A-LABEL: func_areg_33: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use agpr32 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_accvgpr_write_b32 a32, v0 ; Reload Reuse ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: func_areg_33: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use agpr32 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; use agpr32", "~{a32}" () ret void } define void @func_areg_64() #0 { ; GFX90A-LABEL: func_areg_64: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_accvgpr_read_b32 v0, a63 ; Reload Reuse ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use agpr63 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_accvgpr_write_b32 a63, v0 ; Reload Reuse ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: func_areg_64: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use agpr63 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; use agpr63", "~{a63}" () ret void } define void @func_areg_31_63() #0 { ; GFX90A-LABEL: func_areg_31_63: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_accvgpr_read_b32 v0, a63 ; Reload Reuse ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use agpr31, agpr63 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_accvgpr_write_b32 a63, v0 ; Reload Reuse ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: func_areg_31_63: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use agpr31, agpr63 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; use agpr31, agpr63", "~{a31},~{a63}" () ret void } declare void @func_unknown() #0 define amdgpu_kernel void @test_call_empty() #0 { ; GFX90A-LABEL: test_call_empty: ; GFX90A: ; %bb.0: ; %bb ; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 ; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 ; GFX90A-NEXT: s_mov_b32 s22, -1 ; GFX90A-NEXT: s_mov_b32 s23, 0xe00000 ; GFX90A-NEXT: s_add_u32 s20, s20, s11 ; GFX90A-NEXT: s_addc_u32 s21, s21, 0 ; GFX90A-NEXT: s_mov_b32 s12, s8 ; GFX90A-NEXT: s_add_u32 s8, s4, 36 ; GFX90A-NEXT: s_mov_b32 s13, s9 ; GFX90A-NEXT: s_addc_u32 s9, s5, 0 ; GFX90A-NEXT: s_getpc_b64 s[4:5] ; GFX90A-NEXT: s_add_u32 s4, s4, func_empty@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s5, s5, func_empty@gotpcrel32@hi+12 ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX90A-NEXT: s_mov_b32 s14, s10 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21] ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 ; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23] ; GFX90A-NEXT: s_mov_b32 s32, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a[0:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[0:3], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_endpgm ; ; GFX908-LABEL: test_call_empty: ; GFX908: ; %bb.0: ; %bb ; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 ; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 ; GFX908-NEXT: s_mov_b32 s22, -1 ; GFX908-NEXT: s_mov_b32 s23, 0xe00000 ; GFX908-NEXT: s_add_u32 s20, s20, s11 ; GFX908-NEXT: s_addc_u32 s21, s21, 0 ; GFX908-NEXT: s_mov_b32 s12, s8 ; GFX908-NEXT: s_add_u32 s8, s4, 36 ; GFX908-NEXT: s_mov_b32 s13, s9 ; GFX908-NEXT: s_addc_u32 s9, s5, 0 ; GFX908-NEXT: s_getpc_b64 s[4:5] ; GFX908-NEXT: s_add_u32 s4, s4, func_empty@gotpcrel32@lo+4 ; GFX908-NEXT: s_addc_u32 s5, s5, func_empty@gotpcrel32@hi+12 ; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX908-NEXT: s_mov_b32 s14, s10 ; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21] ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23] ; GFX908-NEXT: s_mov_b32 s32, 0 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def a[0:31] ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_accvgpr_read_b32 v6, a3 ; GFX908-NEXT: v_accvgpr_read_b32 v5, a2 ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 ; GFX908-NEXT: v_accvgpr_read_b32 v3, a0 ; GFX908-NEXT: v_accvgpr_read_b32 v10, a7 ; GFX908-NEXT: v_accvgpr_read_b32 v9, a6 ; GFX908-NEXT: v_accvgpr_read_b32 v8, a5 ; GFX908-NEXT: v_accvgpr_read_b32 v7, a4 ; GFX908-NEXT: v_accvgpr_read_b32 v14, a11 ; GFX908-NEXT: v_accvgpr_read_b32 v13, a10 ; GFX908-NEXT: v_accvgpr_read_b32 v12, a9 ; GFX908-NEXT: v_accvgpr_read_b32 v11, a8 ; GFX908-NEXT: v_accvgpr_read_b32 v18, a15 ; GFX908-NEXT: v_accvgpr_read_b32 v17, a14 ; GFX908-NEXT: v_accvgpr_read_b32 v16, a13 ; GFX908-NEXT: v_accvgpr_read_b32 v15, a12 ; GFX908-NEXT: v_accvgpr_read_b32 v22, a19 ; GFX908-NEXT: v_accvgpr_read_b32 v21, a18 ; GFX908-NEXT: v_accvgpr_read_b32 v20, a17 ; GFX908-NEXT: v_accvgpr_read_b32 v19, a16 ; GFX908-NEXT: v_accvgpr_read_b32 v26, a23 ; GFX908-NEXT: v_accvgpr_read_b32 v25, a22 ; GFX908-NEXT: v_accvgpr_read_b32 v24, a21 ; GFX908-NEXT: v_accvgpr_read_b32 v23, a20 ; GFX908-NEXT: v_accvgpr_read_b32 v30, a27 ; GFX908-NEXT: v_accvgpr_read_b32 v29, a26 ; GFX908-NEXT: v_accvgpr_read_b32 v28, a25 ; GFX908-NEXT: v_accvgpr_read_b32 v27, a24 ; GFX908-NEXT: v_accvgpr_read_b32 v35, a31 ; GFX908-NEXT: v_accvgpr_read_b32 v34, a30 ; GFX908-NEXT: v_accvgpr_read_b32 v33, a29 ; GFX908-NEXT: v_accvgpr_read_b32 v32, a28 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_endpgm bb: %reg = call <32 x float> asm sideeffect "; def $0", "=a"() call void @func_empty() store volatile <32 x float> %reg, ptr addrspace(1) poison ret void } define amdgpu_kernel void @test_call_areg4() #0 { ; GFX90A-LABEL: test_call_areg4: ; GFX90A: ; %bb.0: ; %bb ; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 ; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 ; GFX90A-NEXT: s_mov_b32 s22, -1 ; GFX90A-NEXT: s_mov_b32 s23, 0xe00000 ; GFX90A-NEXT: s_add_u32 s20, s20, s11 ; GFX90A-NEXT: s_addc_u32 s21, s21, 0 ; GFX90A-NEXT: s_mov_b32 s12, s8 ; GFX90A-NEXT: s_add_u32 s8, s4, 36 ; GFX90A-NEXT: s_mov_b32 s13, s9 ; GFX90A-NEXT: s_addc_u32 s9, s5, 0 ; GFX90A-NEXT: s_getpc_b64 s[4:5] ; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_4@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_4@gotpcrel32@hi+12 ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX90A-NEXT: s_mov_b32 s14, s10 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21] ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 ; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23] ; GFX90A-NEXT: s_mov_b32 s32, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a[4:35] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_endpgm ; ; GFX908-LABEL: test_call_areg4: ; GFX908: ; %bb.0: ; %bb ; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 ; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 ; GFX908-NEXT: s_mov_b32 s22, -1 ; GFX908-NEXT: s_mov_b32 s23, 0xe00000 ; GFX908-NEXT: s_add_u32 s20, s20, s11 ; GFX908-NEXT: s_addc_u32 s21, s21, 0 ; GFX908-NEXT: s_mov_b32 s12, s8 ; GFX908-NEXT: s_add_u32 s8, s4, 36 ; GFX908-NEXT: s_mov_b32 s13, s9 ; GFX908-NEXT: s_addc_u32 s9, s5, 0 ; GFX908-NEXT: s_getpc_b64 s[4:5] ; GFX908-NEXT: s_add_u32 s4, s4, func_areg_4@gotpcrel32@lo+4 ; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_4@gotpcrel32@hi+12 ; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX908-NEXT: s_mov_b32 s14, s10 ; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21] ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23] ; GFX908-NEXT: s_mov_b32 s32, 0 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def a[0:31] ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_accvgpr_read_b32 v6, a3 ; GFX908-NEXT: v_accvgpr_read_b32 v5, a2 ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 ; GFX908-NEXT: v_accvgpr_read_b32 v3, a0 ; GFX908-NEXT: v_accvgpr_read_b32 v10, a7 ; GFX908-NEXT: v_accvgpr_read_b32 v9, a6 ; GFX908-NEXT: v_accvgpr_read_b32 v8, a5 ; GFX908-NEXT: v_accvgpr_read_b32 v7, a4 ; GFX908-NEXT: v_accvgpr_read_b32 v14, a11 ; GFX908-NEXT: v_accvgpr_read_b32 v13, a10 ; GFX908-NEXT: v_accvgpr_read_b32 v12, a9 ; GFX908-NEXT: v_accvgpr_read_b32 v11, a8 ; GFX908-NEXT: v_accvgpr_read_b32 v18, a15 ; GFX908-NEXT: v_accvgpr_read_b32 v17, a14 ; GFX908-NEXT: v_accvgpr_read_b32 v16, a13 ; GFX908-NEXT: v_accvgpr_read_b32 v15, a12 ; GFX908-NEXT: v_accvgpr_read_b32 v22, a19 ; GFX908-NEXT: v_accvgpr_read_b32 v21, a18 ; GFX908-NEXT: v_accvgpr_read_b32 v20, a17 ; GFX908-NEXT: v_accvgpr_read_b32 v19, a16 ; GFX908-NEXT: v_accvgpr_read_b32 v26, a23 ; GFX908-NEXT: v_accvgpr_read_b32 v25, a22 ; GFX908-NEXT: v_accvgpr_read_b32 v24, a21 ; GFX908-NEXT: v_accvgpr_read_b32 v23, a20 ; GFX908-NEXT: v_accvgpr_read_b32 v30, a27 ; GFX908-NEXT: v_accvgpr_read_b32 v29, a26 ; GFX908-NEXT: v_accvgpr_read_b32 v28, a25 ; GFX908-NEXT: v_accvgpr_read_b32 v27, a24 ; GFX908-NEXT: v_accvgpr_read_b32 v35, a31 ; GFX908-NEXT: v_accvgpr_read_b32 v34, a30 ; GFX908-NEXT: v_accvgpr_read_b32 v33, a29 ; GFX908-NEXT: v_accvgpr_read_b32 v32, a28 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_endpgm bb: %reg = call <32 x float> asm sideeffect "; def $0", "=a"() call void @func_areg_4() store volatile <32 x float> %reg, ptr addrspace(1) poison ret void } define amdgpu_kernel void @test_call_areg32() #0 { ; GFX90A-LABEL: test_call_areg32: ; GFX90A: ; %bb.0: ; %bb ; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 ; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 ; GFX90A-NEXT: s_mov_b32 s22, -1 ; GFX90A-NEXT: s_mov_b32 s23, 0xe00000 ; GFX90A-NEXT: s_add_u32 s20, s20, s11 ; GFX90A-NEXT: s_addc_u32 s21, s21, 0 ; GFX90A-NEXT: s_mov_b32 s12, s8 ; GFX90A-NEXT: s_add_u32 s8, s4, 36 ; GFX90A-NEXT: s_mov_b32 s13, s9 ; GFX90A-NEXT: s_addc_u32 s9, s5, 0 ; GFX90A-NEXT: s_getpc_b64 s[4:5] ; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_32@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_32@gotpcrel32@hi+12 ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX90A-NEXT: s_mov_b32 s14, s10 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21] ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 ; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23] ; GFX90A-NEXT: s_mov_b32 s32, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a[32:63] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_endpgm ; ; GFX908-LABEL: test_call_areg32: ; GFX908: ; %bb.0: ; %bb ; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 ; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 ; GFX908-NEXT: s_mov_b32 s22, -1 ; GFX908-NEXT: s_mov_b32 s23, 0xe00000 ; GFX908-NEXT: s_add_u32 s20, s20, s11 ; GFX908-NEXT: s_addc_u32 s21, s21, 0 ; GFX908-NEXT: s_mov_b32 s12, s8 ; GFX908-NEXT: s_add_u32 s8, s4, 36 ; GFX908-NEXT: s_mov_b32 s13, s9 ; GFX908-NEXT: s_addc_u32 s9, s5, 0 ; GFX908-NEXT: s_getpc_b64 s[4:5] ; GFX908-NEXT: s_add_u32 s4, s4, func_areg_32@gotpcrel32@lo+4 ; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_32@gotpcrel32@hi+12 ; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX908-NEXT: s_mov_b32 s14, s10 ; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21] ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23] ; GFX908-NEXT: s_mov_b32 s32, 0 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def a[0:31] ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_accvgpr_read_b32 v6, a3 ; GFX908-NEXT: v_accvgpr_read_b32 v5, a2 ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 ; GFX908-NEXT: v_accvgpr_read_b32 v3, a0 ; GFX908-NEXT: v_accvgpr_read_b32 v10, a7 ; GFX908-NEXT: v_accvgpr_read_b32 v9, a6 ; GFX908-NEXT: v_accvgpr_read_b32 v8, a5 ; GFX908-NEXT: v_accvgpr_read_b32 v7, a4 ; GFX908-NEXT: v_accvgpr_read_b32 v14, a11 ; GFX908-NEXT: v_accvgpr_read_b32 v13, a10 ; GFX908-NEXT: v_accvgpr_read_b32 v12, a9 ; GFX908-NEXT: v_accvgpr_read_b32 v11, a8 ; GFX908-NEXT: v_accvgpr_read_b32 v18, a15 ; GFX908-NEXT: v_accvgpr_read_b32 v17, a14 ; GFX908-NEXT: v_accvgpr_read_b32 v16, a13 ; GFX908-NEXT: v_accvgpr_read_b32 v15, a12 ; GFX908-NEXT: v_accvgpr_read_b32 v22, a19 ; GFX908-NEXT: v_accvgpr_read_b32 v21, a18 ; GFX908-NEXT: v_accvgpr_read_b32 v20, a17 ; GFX908-NEXT: v_accvgpr_read_b32 v19, a16 ; GFX908-NEXT: v_accvgpr_read_b32 v26, a23 ; GFX908-NEXT: v_accvgpr_read_b32 v25, a22 ; GFX908-NEXT: v_accvgpr_read_b32 v24, a21 ; GFX908-NEXT: v_accvgpr_read_b32 v23, a20 ; GFX908-NEXT: v_accvgpr_read_b32 v30, a27 ; GFX908-NEXT: v_accvgpr_read_b32 v29, a26 ; GFX908-NEXT: v_accvgpr_read_b32 v28, a25 ; GFX908-NEXT: v_accvgpr_read_b32 v27, a24 ; GFX908-NEXT: v_accvgpr_read_b32 v35, a31 ; GFX908-NEXT: v_accvgpr_read_b32 v34, a30 ; GFX908-NEXT: v_accvgpr_read_b32 v33, a29 ; GFX908-NEXT: v_accvgpr_read_b32 v32, a28 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_endpgm bb: %reg = call <32 x float> asm sideeffect "; def $0", "=a"() call void @func_areg_32() store volatile <32 x float> %reg, ptr addrspace(1) poison ret void } define amdgpu_kernel void @test_call_areg64() #0 { ; GFX90A-LABEL: test_call_areg64: ; GFX90A: ; %bb.0: ; %bb ; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 ; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 ; GFX90A-NEXT: s_mov_b32 s22, -1 ; GFX90A-NEXT: s_mov_b32 s23, 0xe00000 ; GFX90A-NEXT: s_add_u32 s20, s20, s11 ; GFX90A-NEXT: s_addc_u32 s21, s21, 0 ; GFX90A-NEXT: s_mov_b32 s12, s8 ; GFX90A-NEXT: s_add_u32 s8, s4, 36 ; GFX90A-NEXT: s_mov_b32 s13, s9 ; GFX90A-NEXT: s_addc_u32 s9, s5, 0 ; GFX90A-NEXT: s_getpc_b64 s[4:5] ; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_64@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_64@gotpcrel32@hi+12 ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX90A-NEXT: s_mov_b32 s14, s10 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21] ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 ; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23] ; GFX90A-NEXT: s_mov_b32 s32, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a[0:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[0:3], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_endpgm ; ; GFX908-LABEL: test_call_areg64: ; GFX908: ; %bb.0: ; %bb ; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 ; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 ; GFX908-NEXT: s_mov_b32 s22, -1 ; GFX908-NEXT: s_mov_b32 s23, 0xe00000 ; GFX908-NEXT: s_add_u32 s20, s20, s11 ; GFX908-NEXT: s_addc_u32 s21, s21, 0 ; GFX908-NEXT: s_mov_b32 s12, s8 ; GFX908-NEXT: s_add_u32 s8, s4, 36 ; GFX908-NEXT: s_mov_b32 s13, s9 ; GFX908-NEXT: s_addc_u32 s9, s5, 0 ; GFX908-NEXT: s_getpc_b64 s[4:5] ; GFX908-NEXT: s_add_u32 s4, s4, func_areg_64@gotpcrel32@lo+4 ; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_64@gotpcrel32@hi+12 ; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX908-NEXT: s_mov_b32 s14, s10 ; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21] ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23] ; GFX908-NEXT: s_mov_b32 s32, 0 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def a[0:31] ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_accvgpr_read_b32 v6, a3 ; GFX908-NEXT: v_accvgpr_read_b32 v5, a2 ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 ; GFX908-NEXT: v_accvgpr_read_b32 v3, a0 ; GFX908-NEXT: v_accvgpr_read_b32 v10, a7 ; GFX908-NEXT: v_accvgpr_read_b32 v9, a6 ; GFX908-NEXT: v_accvgpr_read_b32 v8, a5 ; GFX908-NEXT: v_accvgpr_read_b32 v7, a4 ; GFX908-NEXT: v_accvgpr_read_b32 v14, a11 ; GFX908-NEXT: v_accvgpr_read_b32 v13, a10 ; GFX908-NEXT: v_accvgpr_read_b32 v12, a9 ; GFX908-NEXT: v_accvgpr_read_b32 v11, a8 ; GFX908-NEXT: v_accvgpr_read_b32 v18, a15 ; GFX908-NEXT: v_accvgpr_read_b32 v17, a14 ; GFX908-NEXT: v_accvgpr_read_b32 v16, a13 ; GFX908-NEXT: v_accvgpr_read_b32 v15, a12 ; GFX908-NEXT: v_accvgpr_read_b32 v22, a19 ; GFX908-NEXT: v_accvgpr_read_b32 v21, a18 ; GFX908-NEXT: v_accvgpr_read_b32 v20, a17 ; GFX908-NEXT: v_accvgpr_read_b32 v19, a16 ; GFX908-NEXT: v_accvgpr_read_b32 v26, a23 ; GFX908-NEXT: v_accvgpr_read_b32 v25, a22 ; GFX908-NEXT: v_accvgpr_read_b32 v24, a21 ; GFX908-NEXT: v_accvgpr_read_b32 v23, a20 ; GFX908-NEXT: v_accvgpr_read_b32 v30, a27 ; GFX908-NEXT: v_accvgpr_read_b32 v29, a26 ; GFX908-NEXT: v_accvgpr_read_b32 v28, a25 ; GFX908-NEXT: v_accvgpr_read_b32 v27, a24 ; GFX908-NEXT: v_accvgpr_read_b32 v35, a31 ; GFX908-NEXT: v_accvgpr_read_b32 v34, a30 ; GFX908-NEXT: v_accvgpr_read_b32 v33, a29 ; GFX908-NEXT: v_accvgpr_read_b32 v32, a28 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_endpgm bb: %reg = call <32 x float> asm sideeffect "; def $0", "=a"() call void @func_areg_64() store volatile <32 x float> %reg, ptr addrspace(1) poison ret void } define amdgpu_kernel void @test_call_areg31_63() #0 { ; GFX90A-LABEL: test_call_areg31_63: ; GFX90A: ; %bb.0: ; %bb ; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 ; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 ; GFX90A-NEXT: s_mov_b32 s22, -1 ; GFX90A-NEXT: s_mov_b32 s23, 0xe00000 ; GFX90A-NEXT: s_add_u32 s20, s20, s11 ; GFX90A-NEXT: s_addc_u32 s21, s21, 0 ; GFX90A-NEXT: s_mov_b32 s12, s8 ; GFX90A-NEXT: s_add_u32 s8, s4, 36 ; GFX90A-NEXT: s_mov_b32 s13, s9 ; GFX90A-NEXT: s_addc_u32 s9, s5, 0 ; GFX90A-NEXT: s_getpc_b64 s[4:5] ; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_31_63@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_31_63@gotpcrel32@hi+12 ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX90A-NEXT: s_mov_b32 s14, s10 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21] ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 ; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23] ; GFX90A-NEXT: s_mov_b32 s32, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a[32:63] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_endpgm ; ; GFX908-LABEL: test_call_areg31_63: ; GFX908: ; %bb.0: ; %bb ; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 ; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 ; GFX908-NEXT: s_mov_b32 s22, -1 ; GFX908-NEXT: s_mov_b32 s23, 0xe00000 ; GFX908-NEXT: s_add_u32 s20, s20, s11 ; GFX908-NEXT: s_addc_u32 s21, s21, 0 ; GFX908-NEXT: s_mov_b32 s12, s8 ; GFX908-NEXT: s_add_u32 s8, s4, 36 ; GFX908-NEXT: s_mov_b32 s13, s9 ; GFX908-NEXT: s_addc_u32 s9, s5, 0 ; GFX908-NEXT: s_getpc_b64 s[4:5] ; GFX908-NEXT: s_add_u32 s4, s4, func_areg_31_63@gotpcrel32@lo+4 ; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_31_63@gotpcrel32@hi+12 ; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX908-NEXT: s_mov_b32 s14, s10 ; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21] ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23] ; GFX908-NEXT: s_mov_b32 s32, 0 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def a[0:31] ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_accvgpr_read_b32 v6, a3 ; GFX908-NEXT: v_accvgpr_read_b32 v5, a2 ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 ; GFX908-NEXT: v_accvgpr_read_b32 v3, a0 ; GFX908-NEXT: v_accvgpr_read_b32 v10, a7 ; GFX908-NEXT: v_accvgpr_read_b32 v9, a6 ; GFX908-NEXT: v_accvgpr_read_b32 v8, a5 ; GFX908-NEXT: v_accvgpr_read_b32 v7, a4 ; GFX908-NEXT: v_accvgpr_read_b32 v14, a11 ; GFX908-NEXT: v_accvgpr_read_b32 v13, a10 ; GFX908-NEXT: v_accvgpr_read_b32 v12, a9 ; GFX908-NEXT: v_accvgpr_read_b32 v11, a8 ; GFX908-NEXT: v_accvgpr_read_b32 v18, a15 ; GFX908-NEXT: v_accvgpr_read_b32 v17, a14 ; GFX908-NEXT: v_accvgpr_read_b32 v16, a13 ; GFX908-NEXT: v_accvgpr_read_b32 v15, a12 ; GFX908-NEXT: v_accvgpr_read_b32 v22, a19 ; GFX908-NEXT: v_accvgpr_read_b32 v21, a18 ; GFX908-NEXT: v_accvgpr_read_b32 v20, a17 ; GFX908-NEXT: v_accvgpr_read_b32 v19, a16 ; GFX908-NEXT: v_accvgpr_read_b32 v26, a23 ; GFX908-NEXT: v_accvgpr_read_b32 v25, a22 ; GFX908-NEXT: v_accvgpr_read_b32 v24, a21 ; GFX908-NEXT: v_accvgpr_read_b32 v23, a20 ; GFX908-NEXT: v_accvgpr_read_b32 v30, a27 ; GFX908-NEXT: v_accvgpr_read_b32 v29, a26 ; GFX908-NEXT: v_accvgpr_read_b32 v28, a25 ; GFX908-NEXT: v_accvgpr_read_b32 v27, a24 ; GFX908-NEXT: v_accvgpr_read_b32 v35, a31 ; GFX908-NEXT: v_accvgpr_read_b32 v34, a30 ; GFX908-NEXT: v_accvgpr_read_b32 v33, a29 ; GFX908-NEXT: v_accvgpr_read_b32 v32, a28 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_endpgm bb: %reg = call <32 x float> asm sideeffect "; def $0", "=a"() call void @func_areg_31_63() store volatile <32 x float> %reg, ptr addrspace(1) poison ret void } define amdgpu_kernel void @test_call_unknown() #0 { ; GFX90A-LABEL: test_call_unknown: ; GFX90A: ; %bb.0: ; %bb ; GFX90A-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GFX90A-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GFX90A-NEXT: s_mov_b32 s38, -1 ; GFX90A-NEXT: s_mov_b32 s39, 0xe00000 ; GFX90A-NEXT: s_add_u32 s36, s36, s11 ; GFX90A-NEXT: s_addc_u32 s37, s37, 0 ; GFX90A-NEXT: s_mov_b32 s12, s8 ; GFX90A-NEXT: s_add_u32 s8, s4, 36 ; GFX90A-NEXT: s_mov_b32 s13, s9 ; GFX90A-NEXT: s_addc_u32 s9, s5, 0 ; GFX90A-NEXT: s_getpc_b64 s[4:5] ; GFX90A-NEXT: s_add_u32 s4, s4, func_unknown@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s5, s5, func_unknown@gotpcrel32@hi+12 ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX90A-NEXT: s_mov_b32 s14, s10 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX90A-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 ; GFX90A-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX90A-NEXT: s_mov_b32 s32, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a[32:63] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_endpgm ; ; GFX908-LABEL: test_call_unknown: ; GFX908: ; %bb.0: ; %bb ; GFX908-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GFX908-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GFX908-NEXT: s_mov_b32 s38, -1 ; GFX908-NEXT: s_mov_b32 s39, 0xe00000 ; GFX908-NEXT: s_add_u32 s36, s36, s11 ; GFX908-NEXT: s_addc_u32 s37, s37, 0 ; GFX908-NEXT: s_mov_b32 s12, s8 ; GFX908-NEXT: s_add_u32 s8, s4, 36 ; GFX908-NEXT: s_mov_b32 s13, s9 ; GFX908-NEXT: s_addc_u32 s9, s5, 0 ; GFX908-NEXT: s_getpc_b64 s[4:5] ; GFX908-NEXT: s_add_u32 s4, s4, func_unknown@gotpcrel32@lo+4 ; GFX908-NEXT: s_addc_u32 s5, s5, func_unknown@gotpcrel32@hi+12 ; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX908-NEXT: s_mov_b32 s14, s10 ; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX908-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX908-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX908-NEXT: s_mov_b32 s32, 0 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def a[0:31] ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: v_accvgpr_read_b32 v43, a3 ; GFX908-NEXT: v_accvgpr_read_b32 v42, a2 ; GFX908-NEXT: v_accvgpr_read_b32 v41, a1 ; GFX908-NEXT: v_accvgpr_read_b32 v40, a0 ; GFX908-NEXT: v_accvgpr_read_b32 v47, a7 ; GFX908-NEXT: v_accvgpr_read_b32 v46, a6 ; GFX908-NEXT: v_accvgpr_read_b32 v45, a5 ; GFX908-NEXT: v_accvgpr_read_b32 v44, a4 ; GFX908-NEXT: v_accvgpr_read_b32 v59, a11 ; GFX908-NEXT: v_accvgpr_read_b32 v58, a10 ; GFX908-NEXT: v_accvgpr_read_b32 v57, a9 ; GFX908-NEXT: v_accvgpr_read_b32 v56, a8 ; GFX908-NEXT: v_accvgpr_read_b32 v63, a15 ; GFX908-NEXT: v_accvgpr_read_b32 v62, a14 ; GFX908-NEXT: v_accvgpr_read_b32 v61, a13 ; GFX908-NEXT: v_accvgpr_read_b32 v60, a12 ; GFX908-NEXT: v_accvgpr_read_b32 v75, a19 ; GFX908-NEXT: v_accvgpr_read_b32 v74, a18 ; GFX908-NEXT: v_accvgpr_read_b32 v73, a17 ; GFX908-NEXT: v_accvgpr_read_b32 v72, a16 ; GFX908-NEXT: v_accvgpr_read_b32 v79, a23 ; GFX908-NEXT: v_accvgpr_read_b32 v78, a22 ; GFX908-NEXT: v_accvgpr_read_b32 v77, a21 ; GFX908-NEXT: v_accvgpr_read_b32 v76, a20 ; GFX908-NEXT: v_accvgpr_read_b32 v91, a27 ; GFX908-NEXT: v_accvgpr_read_b32 v90, a26 ; GFX908-NEXT: v_accvgpr_read_b32 v89, a25 ; GFX908-NEXT: v_accvgpr_read_b32 v88, a24 ; GFX908-NEXT: v_accvgpr_read_b32 v95, a31 ; GFX908-NEXT: v_accvgpr_read_b32 v94, a30 ; GFX908-NEXT: v_accvgpr_read_b32 v93, a29 ; GFX908-NEXT: v_accvgpr_read_b32 v92, a28 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[92:95], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[88:91], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[76:79], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[72:75], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[60:63], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[56:59], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[44:47], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[40:43], off ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_endpgm bb: %reg = call <32 x float> asm sideeffect "; def $0", "=a"() call void @func_unknown() store volatile <32 x float> %reg, ptr addrspace(1) poison ret void } attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }