; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck --check-prefix=DAGISEL %s ; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck --check-prefix=GISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=DAGISEL64 %s ; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GISEL64 %s ; Make sure the i1 %active is passed through EXEC. ; The EXEC mask should be set to -1 for the duration of the function ; and restored to its original value in the epilogue. ; We will also need to restore the inactive lanes for any allocated VGPRs. define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) { ; DAGISEL-LABEL: basic_test: ; DAGISEL: ; %bb.0: ; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL-NEXT: s_wait_expcnt 0x0 ; DAGISEL-NEXT: s_wait_samplecnt 0x0 ; DAGISEL-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL-NEXT: s_wait_kmcnt 0x0 ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1 ; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; DAGISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 ; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo ; DAGISEL-NEXT: s_wait_loadcnt 0x0 ; DAGISEL-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: basic_test: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL-NEXT: s_wait_expcnt 0x0 ; GISEL-NEXT: s_wait_samplecnt 0x0 ; GISEL-NEXT: s_wait_bvhcnt 0x0 ; GISEL-NEXT: s_wait_kmcnt 0x0 ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1 ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; GISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_load_b32 v0, off, s32 ; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; GISEL-NEXT: s_mov_b32 exec_lo, vcc_lo ; GISEL-NEXT: s_wait_loadcnt 0x0 ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; DAGISEL64-LABEL: basic_test: ; DAGISEL64: ; %bb.0: ; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL64-NEXT: s_wait_expcnt 0x0 ; DAGISEL64-NEXT: s_wait_samplecnt 0x0 ; DAGISEL64-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL64-NEXT: s_wait_kmcnt 0x0 ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc ; DAGISEL64-NEXT: v_cndmask_b32_e32 v1, 3, v1, vcc ; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) ; DAGISEL64-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; DAGISEL64-NEXT: s_xor_b64 exec, vcc, -1 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32 ; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; DAGISEL64-NEXT: s_mov_b64 exec, vcc ; DAGISEL64-NEXT: s_wait_loadcnt 0x0 ; DAGISEL64-NEXT: s_setpc_b64 s[30:31] ; ; GISEL64-LABEL: basic_test: ; GISEL64: ; %bb.0: ; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL64-NEXT: s_wait_expcnt 0x0 ; GISEL64-NEXT: s_wait_samplecnt 0x0 ; GISEL64-NEXT: s_wait_bvhcnt 0x0 ; GISEL64-NEXT: s_wait_kmcnt 0x0 ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc ; GISEL64-NEXT: v_cndmask_b32_e32 v1, 3, v1, vcc ; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL64-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; GISEL64-NEXT: s_xor_b64 exec, vcc, -1 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_load_b32 v0, off, s32 ; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; GISEL64-NEXT: s_mov_b64 exec, vcc ; GISEL64-NEXT: s_wait_loadcnt 0x0 ; GISEL64-NEXT: s_setpc_b64 s[30:31] %x = select i1 %active, i32 %a, i32 5 %y = select i1 %active, i32 %b, i32 3 %ret = call i32 @llvm.amdgcn.update.dpp.i32(i32 %x, i32 %y, i32 1, i32 1, i32 1, i1 false) ret i32 %ret } ; Make sure we don't crash if there's only one use for %active. define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 %b) { ; DAGISEL-LABEL: single_use_of_active: ; DAGISEL: ; %bb.0: ; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL-NEXT: s_wait_expcnt 0x0 ; DAGISEL-NEXT: s_wait_samplecnt 0x0 ; DAGISEL-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL-NEXT: s_wait_kmcnt 0x0 ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: v_cndmask_b32_e32 v1, 17, v1, vcc_lo ; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; DAGISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 ; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo ; DAGISEL-NEXT: s_wait_loadcnt 0x0 ; DAGISEL-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: single_use_of_active: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL-NEXT: s_wait_expcnt 0x0 ; GISEL-NEXT: s_wait_samplecnt 0x0 ; GISEL-NEXT: s_wait_bvhcnt 0x0 ; GISEL-NEXT: s_wait_kmcnt 0x0 ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: v_cndmask_b32_e32 v1, 17, v1, vcc_lo ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; GISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_load_b32 v0, off, s32 ; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; GISEL-NEXT: s_mov_b32 exec_lo, vcc_lo ; GISEL-NEXT: s_wait_loadcnt 0x0 ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; DAGISEL64-LABEL: single_use_of_active: ; DAGISEL64: ; %bb.0: ; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL64-NEXT: s_wait_expcnt 0x0 ; DAGISEL64-NEXT: s_wait_samplecnt 0x0 ; DAGISEL64-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL64-NEXT: s_wait_kmcnt 0x0 ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: v_cndmask_b32_e32 v1, 17, v1, vcc ; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) ; DAGISEL64-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; DAGISEL64-NEXT: s_xor_b64 exec, vcc, -1 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32 ; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; DAGISEL64-NEXT: s_mov_b64 exec, vcc ; DAGISEL64-NEXT: s_wait_loadcnt 0x0 ; DAGISEL64-NEXT: s_setpc_b64 s[30:31] ; ; GISEL64-LABEL: single_use_of_active: ; GISEL64: ; %bb.0: ; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL64-NEXT: s_wait_expcnt 0x0 ; GISEL64-NEXT: s_wait_samplecnt 0x0 ; GISEL64-NEXT: s_wait_bvhcnt 0x0 ; GISEL64-NEXT: s_wait_kmcnt 0x0 ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: v_cndmask_b32_e32 v1, 17, v1, vcc ; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL64-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; GISEL64-NEXT: s_xor_b64 exec, vcc, -1 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_load_b32 v0, off, s32 ; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; GISEL64-NEXT: s_mov_b64 exec, vcc ; GISEL64-NEXT: s_wait_loadcnt 0x0 ; GISEL64-NEXT: s_setpc_b64 s[30:31] %y = select i1 %active, i32 %b, i32 17 %ret = call i32 @llvm.amdgcn.update.dpp.i32(i32 %a, i32 %y, i32 1, i32 1, i32 1, i1 false) ret i32 %ret } ; Make sure we don't crash if %active is not used at all. define amdgpu_gfx_whole_wave i32 @unused_active(i1 %active, i32 %a, i32 %b) { ; DAGISEL-LABEL: unused_active: ; DAGISEL: ; %bb.0: ; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL-NEXT: s_wait_expcnt 0x0 ; DAGISEL-NEXT: s_wait_samplecnt 0x0 ; DAGISEL-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL-NEXT: s_wait_kmcnt 0x0 ; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: v_mov_b32_e32 v0, 14 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1 ; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; DAGISEL-NEXT: s_mov_b32 exec_lo, s0 ; DAGISEL-NEXT: s_wait_loadcnt 0x0 ; DAGISEL-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: unused_active: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL-NEXT: s_wait_expcnt 0x0 ; GISEL-NEXT: s_wait_samplecnt 0x0 ; GISEL-NEXT: s_wait_bvhcnt 0x0 ; GISEL-NEXT: s_wait_kmcnt 0x0 ; GISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: v_mov_b32_e32 v0, 14 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_xor_b32 exec_lo, s0, -1 ; GISEL-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b32 exec_lo, s0 ; GISEL-NEXT: s_wait_loadcnt 0x0 ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; DAGISEL64-LABEL: unused_active: ; DAGISEL64: ; %bb.0: ; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL64-NEXT: s_wait_expcnt 0x0 ; DAGISEL64-NEXT: s_wait_samplecnt 0x0 ; DAGISEL64-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL64-NEXT: s_wait_kmcnt 0x0 ; DAGISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: v_mov_b32_e32 v0, 14 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: s_xor_b64 exec, s[0:1], -1 ; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; DAGISEL64-NEXT: s_mov_b64 exec, s[0:1] ; DAGISEL64-NEXT: s_wait_loadcnt 0x0 ; DAGISEL64-NEXT: s_setpc_b64 s[30:31] ; ; GISEL64-LABEL: unused_active: ; GISEL64: ; %bb.0: ; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL64-NEXT: s_wait_expcnt 0x0 ; GISEL64-NEXT: s_wait_samplecnt 0x0 ; GISEL64-NEXT: s_wait_bvhcnt 0x0 ; GISEL64-NEXT: s_wait_kmcnt 0x0 ; GISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: v_mov_b32_e32 v0, 14 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: s_xor_b64 exec, s[0:1], -1 ; GISEL64-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; GISEL64-NEXT: s_mov_b64 exec, s[0:1] ; GISEL64-NEXT: s_wait_loadcnt 0x0 ; GISEL64-NEXT: s_setpc_b64 s[30:31] ret i32 14 } ; For any used VGPRs (including those used for SGPR spills), we need to restore the inactive lanes. ; For CSR VGPRs, we need to restore all lanes. define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; DAGISEL-LABEL: csr: ; DAGISEL: ; %bb.0: ; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL-NEXT: s_wait_expcnt 0x0 ; DAGISEL-NEXT: s_wait_samplecnt 0x0 ; DAGISEL-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL-NEXT: s_wait_kmcnt 0x0 ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x3 ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:8 ; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill ; DAGISEL-NEXT: ;;#ASMSTART ; DAGISEL-NEXT: ; clobber CSR ; DAGISEL-NEXT: ;;#ASMEND ; DAGISEL-NEXT: v_writelane_b32 v2, s20, 0 ; DAGISEL-NEXT: ;;#ASMSTART ; DAGISEL-NEXT: ; clobber non-CSR ; DAGISEL-NEXT: ;;#ASMEND ; DAGISEL-NEXT: scratch_load_b32 v40, off, s32 offset:12 ; 4-byte Folded Reload ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1 ; DAGISEL-NEXT: v_readlane_b32 s20, v2, 0 ; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; DAGISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x3 ; DAGISEL-NEXT: scratch_load_b32 v2, off, s32 ; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:8 ; DAGISEL-NEXT: scratch_load_b32 v49, off, s32 offset:16 ; DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo ; DAGISEL-NEXT: s_wait_loadcnt 0x0 ; DAGISEL-NEXT: s_wait_alu 0xf1ff ; DAGISEL-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: csr: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL-NEXT: s_wait_expcnt 0x0 ; GISEL-NEXT: s_wait_samplecnt 0x0 ; GISEL-NEXT: s_wait_bvhcnt 0x0 ; GISEL-NEXT: s_wait_kmcnt 0x0 ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x3 ; GISEL-NEXT: scratch_store_b32 off, v2, s32 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:8 ; GISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill ; GISEL-NEXT: ;;#ASMSTART ; GISEL-NEXT: ; clobber CSR ; GISEL-NEXT: ;;#ASMEND ; GISEL-NEXT: v_writelane_b32 v2, s20, 0 ; GISEL-NEXT: ;;#ASMSTART ; GISEL-NEXT: ; clobber non-CSR ; GISEL-NEXT: ;;#ASMEND ; GISEL-NEXT: scratch_load_b32 v40, off, s32 offset:12 ; 4-byte Folded Reload ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1 ; GISEL-NEXT: v_readlane_b32 s20, v2, 0 ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GISEL-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; GISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1 ; GISEL-NEXT: s_clause 0x3 ; GISEL-NEXT: scratch_load_b32 v2, off, s32 ; GISEL-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:8 ; GISEL-NEXT: scratch_load_b32 v49, off, s32 offset:16 ; GISEL-NEXT: s_mov_b32 exec_lo, vcc_lo ; GISEL-NEXT: s_wait_loadcnt 0x0 ; GISEL-NEXT: s_wait_alu 0xf1ff ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; DAGISEL64-LABEL: csr: ; DAGISEL64: ; %bb.0: ; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL64-NEXT: s_wait_expcnt 0x0 ; DAGISEL64-NEXT: s_wait_samplecnt 0x0 ; DAGISEL64-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL64-NEXT: s_wait_kmcnt 0x0 ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x3 ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:8 ; DAGISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill ; DAGISEL64-NEXT: ;;#ASMSTART ; DAGISEL64-NEXT: ; clobber CSR ; DAGISEL64-NEXT: ;;#ASMEND ; DAGISEL64-NEXT: v_writelane_b32 v2, s20, 0 ; DAGISEL64-NEXT: ;;#ASMSTART ; DAGISEL64-NEXT: ; clobber non-CSR ; DAGISEL64-NEXT: ;;#ASMEND ; DAGISEL64-NEXT: scratch_load_b32 v40, off, s32 offset:12 ; 4-byte Folded Reload ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc ; DAGISEL64-NEXT: v_cndmask_b32_e32 v1, 3, v1, vcc ; DAGISEL64-NEXT: v_readlane_b32 s20, v2, 0 ; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_2) ; DAGISEL64-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; DAGISEL64-NEXT: s_xor_b64 exec, vcc, -1 ; DAGISEL64-NEXT: s_clause 0x3 ; DAGISEL64-NEXT: scratch_load_b32 v2, off, s32 ; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:8 ; DAGISEL64-NEXT: scratch_load_b32 v49, off, s32 offset:16 ; DAGISEL64-NEXT: s_mov_b64 exec, vcc ; DAGISEL64-NEXT: s_wait_loadcnt 0x0 ; DAGISEL64-NEXT: s_wait_alu 0xf1ff ; DAGISEL64-NEXT: s_setpc_b64 s[30:31] ; ; GISEL64-LABEL: csr: ; GISEL64: ; %bb.0: ; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL64-NEXT: s_wait_expcnt 0x0 ; GISEL64-NEXT: s_wait_samplecnt 0x0 ; GISEL64-NEXT: s_wait_bvhcnt 0x0 ; GISEL64-NEXT: s_wait_kmcnt 0x0 ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x3 ; GISEL64-NEXT: scratch_store_b32 off, v2, s32 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:8 ; GISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill ; GISEL64-NEXT: ;;#ASMSTART ; GISEL64-NEXT: ; clobber CSR ; GISEL64-NEXT: ;;#ASMEND ; GISEL64-NEXT: v_writelane_b32 v2, s20, 0 ; GISEL64-NEXT: ;;#ASMSTART ; GISEL64-NEXT: ; clobber non-CSR ; GISEL64-NEXT: ;;#ASMEND ; GISEL64-NEXT: scratch_load_b32 v40, off, s32 offset:12 ; 4-byte Folded Reload ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc ; GISEL64-NEXT: v_cndmask_b32_e32 v1, 3, v1, vcc ; GISEL64-NEXT: v_readlane_b32 s20, v2, 0 ; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GISEL64-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; GISEL64-NEXT: s_xor_b64 exec, vcc, -1 ; GISEL64-NEXT: s_clause 0x3 ; GISEL64-NEXT: scratch_load_b32 v2, off, s32 ; GISEL64-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:8 ; GISEL64-NEXT: scratch_load_b32 v49, off, s32 offset:16 ; GISEL64-NEXT: s_mov_b64 exec, vcc ; GISEL64-NEXT: s_wait_loadcnt 0x0 ; GISEL64-NEXT: s_wait_alu 0xf1ff ; GISEL64-NEXT: s_setpc_b64 s[30:31] %x = select i1 %active, i32 %a, i32 5 %y = select i1 %active, i32 %b, i32 3 call void asm sideeffect "; clobber CSR", "~{v40},~{s48}"() call void asm sideeffect "; clobber non-CSR", "~{v49},~{s20}"() %ret = call i32 @llvm.amdgcn.update.dpp.i32(i32 %x, i32 %y, i32 1, i32 1, i32 1, i1 false) ret i32 %ret } ; Save and restore all lanes of v40. define amdgpu_gfx_whole_wave void @csr_vgpr_only(i1 %active, i32 %a, i32 %b) { ; DAGISEL-LABEL: csr_vgpr_only: ; DAGISEL: ; %bb.0: ; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL-NEXT: s_wait_expcnt 0x0 ; DAGISEL-NEXT: s_wait_samplecnt 0x0 ; DAGISEL-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL-NEXT: s_wait_kmcnt 0x0 ; DAGISEL-NEXT: s_or_saveexec_b32 s0, -1 ; DAGISEL-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill ; DAGISEL-NEXT: ;;#ASMSTART ; DAGISEL-NEXT: ; clobber CSR VGPR ; DAGISEL-NEXT: ;;#ASMEND ; DAGISEL-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_mov_b32 exec_lo, s0 ; DAGISEL-NEXT: s_wait_loadcnt 0x0 ; DAGISEL-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: csr_vgpr_only: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL-NEXT: s_wait_expcnt 0x0 ; GISEL-NEXT: s_wait_samplecnt 0x0 ; GISEL-NEXT: s_wait_bvhcnt 0x0 ; GISEL-NEXT: s_wait_kmcnt 0x0 ; GISEL-NEXT: s_or_saveexec_b32 s0, -1 ; GISEL-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill ; GISEL-NEXT: ;;#ASMSTART ; GISEL-NEXT: ; clobber CSR VGPR ; GISEL-NEXT: ;;#ASMEND ; GISEL-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_mov_b32 exec_lo, s0 ; GISEL-NEXT: s_wait_loadcnt 0x0 ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; DAGISEL64-LABEL: csr_vgpr_only: ; DAGISEL64: ; %bb.0: ; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL64-NEXT: s_wait_expcnt 0x0 ; DAGISEL64-NEXT: s_wait_samplecnt 0x0 ; DAGISEL64-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL64-NEXT: s_wait_kmcnt 0x0 ; DAGISEL64-NEXT: s_or_saveexec_b64 s[0:1], -1 ; DAGISEL64-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill ; DAGISEL64-NEXT: ;;#ASMSTART ; DAGISEL64-NEXT: ; clobber CSR VGPR ; DAGISEL64-NEXT: ;;#ASMEND ; DAGISEL64-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: s_mov_b64 exec, s[0:1] ; DAGISEL64-NEXT: s_wait_loadcnt 0x0 ; DAGISEL64-NEXT: s_setpc_b64 s[30:31] ; ; GISEL64-LABEL: csr_vgpr_only: ; GISEL64: ; %bb.0: ; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL64-NEXT: s_wait_expcnt 0x0 ; GISEL64-NEXT: s_wait_samplecnt 0x0 ; GISEL64-NEXT: s_wait_bvhcnt 0x0 ; GISEL64-NEXT: s_wait_kmcnt 0x0 ; GISEL64-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GISEL64-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill ; GISEL64-NEXT: ;;#ASMSTART ; GISEL64-NEXT: ; clobber CSR VGPR ; GISEL64-NEXT: ;;#ASMEND ; GISEL64-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: s_mov_b64 exec, s[0:1] ; GISEL64-NEXT: s_wait_loadcnt 0x0 ; GISEL64-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; clobber CSR VGPR", "~{v40}"() ret void } define amdgpu_gfx_whole_wave void @sgpr_spill_only(i1 %active, i32 %a, i32 %b) { ; DAGISEL-LABEL: sgpr_spill_only: ; DAGISEL: ; %bb.0: ; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL-NEXT: s_wait_expcnt 0x0 ; DAGISEL-NEXT: s_wait_samplecnt 0x0 ; DAGISEL-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL-NEXT: s_wait_kmcnt 0x0 ; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: v_writelane_b32 v0, s68, 0 ; DAGISEL-NEXT: ;;#ASMSTART ; DAGISEL-NEXT: ; clobber CSR SGPR ; DAGISEL-NEXT: ;;#ASMEND ; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; DAGISEL-NEXT: v_readlane_b32 s68, v0, 0 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1 ; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; DAGISEL-NEXT: s_mov_b32 exec_lo, s0 ; DAGISEL-NEXT: s_wait_loadcnt 0x0 ; DAGISEL-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: sgpr_spill_only: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL-NEXT: s_wait_expcnt 0x0 ; GISEL-NEXT: s_wait_samplecnt 0x0 ; GISEL-NEXT: s_wait_bvhcnt 0x0 ; GISEL-NEXT: s_wait_kmcnt 0x0 ; GISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: v_writelane_b32 v0, s68, 0 ; GISEL-NEXT: ;;#ASMSTART ; GISEL-NEXT: ; clobber CSR SGPR ; GISEL-NEXT: ;;#ASMEND ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-NEXT: v_readlane_b32 s68, v0, 0 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_xor_b32 exec_lo, s0, -1 ; GISEL-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b32 exec_lo, s0 ; GISEL-NEXT: s_wait_loadcnt 0x0 ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; DAGISEL64-LABEL: sgpr_spill_only: ; DAGISEL64: ; %bb.0: ; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL64-NEXT: s_wait_expcnt 0x0 ; DAGISEL64-NEXT: s_wait_samplecnt 0x0 ; DAGISEL64-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL64-NEXT: s_wait_kmcnt 0x0 ; DAGISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: v_writelane_b32 v0, s68, 0 ; DAGISEL64-NEXT: ;;#ASMSTART ; DAGISEL64-NEXT: ; clobber CSR SGPR ; DAGISEL64-NEXT: ;;#ASMEND ; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) ; DAGISEL64-NEXT: v_readlane_b32 s68, v0, 0 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: s_xor_b64 exec, s[0:1], -1 ; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; DAGISEL64-NEXT: s_mov_b64 exec, s[0:1] ; DAGISEL64-NEXT: s_wait_loadcnt 0x0 ; DAGISEL64-NEXT: s_setpc_b64 s[30:31] ; ; GISEL64-LABEL: sgpr_spill_only: ; GISEL64: ; %bb.0: ; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL64-NEXT: s_wait_expcnt 0x0 ; GISEL64-NEXT: s_wait_samplecnt 0x0 ; GISEL64-NEXT: s_wait_bvhcnt 0x0 ; GISEL64-NEXT: s_wait_kmcnt 0x0 ; GISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: v_writelane_b32 v0, s68, 0 ; GISEL64-NEXT: ;;#ASMSTART ; GISEL64-NEXT: ; clobber CSR SGPR ; GISEL64-NEXT: ;;#ASMEND ; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL64-NEXT: v_readlane_b32 s68, v0, 0 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: s_xor_b64 exec, s[0:1], -1 ; GISEL64-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload ; GISEL64-NEXT: s_mov_b64 exec, s[0:1] ; GISEL64-NEXT: s_wait_loadcnt 0x0 ; GISEL64-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; clobber CSR SGPR", "~{s68}"() ret void } define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) { ; DAGISEL-LABEL: multiple_blocks: ; DAGISEL: ; %bb.0: ; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL-NEXT: s_wait_expcnt 0x0 ; DAGISEL-NEXT: s_wait_samplecnt 0x0 ; DAGISEL-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL-NEXT: s_wait_kmcnt 0x0 ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; DAGISEL-NEXT: s_mov_b32 s1, exec_lo ; DAGISEL-NEXT: v_cmpx_eq_u32_e64 v0, v1 ; DAGISEL-NEXT: ; %bb.1: ; %if.then ; DAGISEL-NEXT: v_add_nc_u32_e32 v1, v0, v1 ; DAGISEL-NEXT: ; %bb.2: ; %if.end ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; DAGISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 ; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo ; DAGISEL-NEXT: s_wait_loadcnt 0x0 ; DAGISEL-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: multiple_blocks: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL-NEXT: s_wait_expcnt 0x0 ; GISEL-NEXT: s_wait_samplecnt 0x0 ; GISEL-NEXT: s_wait_bvhcnt 0x0 ; GISEL-NEXT: s_wait_kmcnt 0x0 ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: s_mov_b32 s1, exec_lo ; GISEL-NEXT: v_cmpx_eq_u32_e64 v0, v1 ; GISEL-NEXT: ; %bb.1: ; %if.then ; GISEL-NEXT: v_add_nc_u32_e32 v1, v0, v1 ; GISEL-NEXT: ; %bb.2: ; %if.end ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_load_b32 v0, off, s32 ; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; GISEL-NEXT: s_mov_b32 exec_lo, vcc_lo ; GISEL-NEXT: s_wait_loadcnt 0x0 ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; DAGISEL64-LABEL: multiple_blocks: ; DAGISEL64: ; %bb.0: ; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL64-NEXT: s_wait_expcnt 0x0 ; DAGISEL64-NEXT: s_wait_samplecnt 0x0 ; DAGISEL64-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL64-NEXT: s_wait_kmcnt 0x0 ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; DAGISEL64-NEXT: s_mov_b64 s[2:3], exec ; DAGISEL64-NEXT: v_cmpx_eq_u32_e64 v0, v1 ; DAGISEL64-NEXT: ; %bb.1: ; %if.then ; DAGISEL64-NEXT: v_add_nc_u32_e32 v1, v0, v1 ; DAGISEL64-NEXT: ; %bb.2: ; %if.end ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: s_or_b64 exec, exec, s[2:3] ; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) ; DAGISEL64-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; DAGISEL64-NEXT: s_xor_b64 exec, vcc, -1 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32 ; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; DAGISEL64-NEXT: s_mov_b64 exec, vcc ; DAGISEL64-NEXT: s_wait_loadcnt 0x0 ; DAGISEL64-NEXT: s_setpc_b64 s[30:31] ; ; GISEL64-LABEL: multiple_blocks: ; GISEL64: ; %bb.0: ; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL64-NEXT: s_wait_expcnt 0x0 ; GISEL64-NEXT: s_wait_samplecnt 0x0 ; GISEL64-NEXT: s_wait_bvhcnt 0x0 ; GISEL64-NEXT: s_wait_kmcnt 0x0 ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL64-NEXT: s_mov_b64 s[2:3], exec ; GISEL64-NEXT: v_cmpx_eq_u32_e64 v0, v1 ; GISEL64-NEXT: ; %bb.1: ; %if.then ; GISEL64-NEXT: v_add_nc_u32_e32 v1, v0, v1 ; GISEL64-NEXT: ; %bb.2: ; %if.end ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: s_or_b64 exec, exec, s[2:3] ; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL64-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GISEL64-NEXT: s_xor_b64 exec, vcc, -1 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_load_b32 v0, off, s32 ; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; GISEL64-NEXT: s_mov_b64 exec, vcc ; GISEL64-NEXT: s_wait_loadcnt 0x0 ; GISEL64-NEXT: s_setpc_b64 s[30:31] %c = icmp eq i32 %a, %b br i1 %c, label %if.then, label %if.end if.then: ; preds = %0 %d = add i32 %a, %b br label %if.end if.end: %f = phi i32 [ %d, %if.then ], [ %b, %0 ] %e = select i1 %active, i32 %a, i32 %f ret i32 %e } define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { ; DAGISEL-LABEL: ret_64: ; DAGISEL: ; %bb.0: ; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL-NEXT: s_wait_expcnt 0x0 ; DAGISEL-NEXT: s_wait_samplecnt 0x0 ; DAGISEL-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL-NEXT: s_wait_kmcnt 0x0 ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x3 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 ; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: v_dual_cndmask_b32 v1, 0, v1 :: v_dual_cndmask_b32 v0, 5, v0 ; DAGISEL-NEXT: v_dual_cndmask_b32 v2, 3, v2 :: v_dual_cndmask_b32 v3, 0, v3 ; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; DAGISEL-NEXT: v_mov_b32_dpp v0, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; DAGISEL-NEXT: v_mov_b32_dpp v1, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; DAGISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x3 ; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 ; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; DAGISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8 ; DAGISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12 ; DAGISEL-NEXT: s_mov_b32 exec_lo, vcc_lo ; DAGISEL-NEXT: s_wait_loadcnt 0x0 ; DAGISEL-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: ret_64: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL-NEXT: s_wait_expcnt 0x0 ; GISEL-NEXT: s_wait_samplecnt 0x0 ; GISEL-NEXT: s_wait_bvhcnt 0x0 ; GISEL-NEXT: s_wait_kmcnt 0x0 ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x3 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 ; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 0, v1 ; GISEL-NEXT: v_dual_cndmask_b32 v2, 3, v2 :: v_dual_cndmask_b32 v3, 0, v3 ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GISEL-NEXT: v_mov_b32_dpp v0, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; GISEL-NEXT: v_mov_b32_dpp v1, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; GISEL-NEXT: s_xor_b32 exec_lo, vcc_lo, -1 ; GISEL-NEXT: s_clause 0x3 ; GISEL-NEXT: scratch_load_b32 v0, off, s32 ; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; GISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8 ; GISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12 ; GISEL-NEXT: s_mov_b32 exec_lo, vcc_lo ; GISEL-NEXT: s_wait_loadcnt 0x0 ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; DAGISEL64-LABEL: ret_64: ; DAGISEL64: ; %bb.0: ; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL64-NEXT: s_wait_expcnt 0x0 ; DAGISEL64-NEXT: s_wait_samplecnt 0x0 ; DAGISEL64-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL64-NEXT: s_wait_kmcnt 0x0 ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x3 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; DAGISEL64-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc ; DAGISEL64-NEXT: v_cndmask_b32_e32 v2, 3, v2, vcc ; DAGISEL64-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; DAGISEL64-NEXT: v_mov_b32_dpp v0, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; DAGISEL64-NEXT: v_mov_b32_dpp v1, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; DAGISEL64-NEXT: s_xor_b64 exec, vcc, -1 ; DAGISEL64-NEXT: s_clause 0x3 ; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32 ; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; DAGISEL64-NEXT: scratch_load_b32 v2, off, s32 offset:8 ; DAGISEL64-NEXT: scratch_load_b32 v3, off, s32 offset:12 ; DAGISEL64-NEXT: s_mov_b64 exec, vcc ; DAGISEL64-NEXT: s_wait_loadcnt 0x0 ; DAGISEL64-NEXT: s_setpc_b64 s[30:31] ; ; GISEL64-LABEL: ret_64: ; GISEL64: ; %bb.0: ; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL64-NEXT: s_wait_expcnt 0x0 ; GISEL64-NEXT: s_wait_samplecnt 0x0 ; GISEL64-NEXT: s_wait_bvhcnt 0x0 ; GISEL64-NEXT: s_wait_kmcnt 0x0 ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x3 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 ; GISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: v_cndmask_b32_e32 v0, 5, v0, vcc ; GISEL64-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GISEL64-NEXT: v_cndmask_b32_e32 v2, 3, v2, vcc ; GISEL64-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GISEL64-NEXT: v_mov_b32_dpp v0, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; GISEL64-NEXT: v_mov_b32_dpp v1, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; GISEL64-NEXT: s_xor_b64 exec, vcc, -1 ; GISEL64-NEXT: s_clause 0x3 ; GISEL64-NEXT: scratch_load_b32 v0, off, s32 ; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; GISEL64-NEXT: scratch_load_b32 v2, off, s32 offset:8 ; GISEL64-NEXT: scratch_load_b32 v3, off, s32 offset:12 ; GISEL64-NEXT: s_mov_b64 exec, vcc ; GISEL64-NEXT: s_wait_loadcnt 0x0 ; GISEL64-NEXT: s_setpc_b64 s[30:31] %x = select i1 %active, i64 %a, i64 5 %y = select i1 %active, i64 %b, i64 3 %ret = call i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %y, i32 1, i32 1, i32 1, i1 false) ret i64 %ret } define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i32> inreg %v4i32, float inreg %float, ptr addrspace(5) inreg %ptr, ptr addrspace(5) inreg %ptr2) { ; DAGISEL-LABEL: inreg_args: ; DAGISEL: ; %bb.0: ; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL-NEXT: s_wait_expcnt 0x0 ; DAGISEL-NEXT: s_wait_samplecnt 0x0 ; DAGISEL-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL-NEXT: s_wait_kmcnt 0x0 ; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; DAGISEL-NEXT: s_clause 0x5 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 ; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 ; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s9 ; DAGISEL-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 ; DAGISEL-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8 ; DAGISEL-NEXT: scratch_store_b32 off, v4, s10 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b128 off, v[0:3], s11 ; DAGISEL-NEXT: scratch_store_b32 off, v5, s11 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1 ; DAGISEL-NEXT: s_clause 0x5 ; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 ; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; DAGISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8 ; DAGISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12 ; DAGISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16 ; DAGISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20 ; DAGISEL-NEXT: s_mov_b32 exec_lo, s0 ; DAGISEL-NEXT: s_wait_loadcnt 0x0 ; DAGISEL-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: inreg_args: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL-NEXT: s_wait_expcnt 0x0 ; GISEL-NEXT: s_wait_samplecnt 0x0 ; GISEL-NEXT: s_wait_bvhcnt 0x0 ; GISEL-NEXT: s_wait_kmcnt 0x0 ; GISEL-NEXT: s_xor_saveexec_b32 s34, -1 ; GISEL-NEXT: s_clause 0x5 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 ; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; GISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 ; GISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_mov_b32 s0, s5 ; GISEL-NEXT: s_mov_b32 s1, s6 ; GISEL-NEXT: s_mov_b32 s2, s7 ; GISEL-NEXT: s_mov_b32 s3, s8 ; GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 ; GISEL-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 ; GISEL-NEXT: v_mov_b32_e32 v5, s9 ; GISEL-NEXT: scratch_store_b32 off, v4, s10 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b128 off, v[0:3], s11 ; GISEL-NEXT: scratch_store_b32 off, v5, s11 ; GISEL-NEXT: s_xor_b32 exec_lo, s34, -1 ; GISEL-NEXT: s_clause 0x5 ; GISEL-NEXT: scratch_load_b32 v0, off, s32 ; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; GISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8 ; GISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12 ; GISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16 ; GISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20 ; GISEL-NEXT: s_mov_b32 exec_lo, s34 ; GISEL-NEXT: s_wait_loadcnt 0x0 ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; DAGISEL64-LABEL: inreg_args: ; DAGISEL64: ; %bb.0: ; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL64-NEXT: s_wait_expcnt 0x0 ; DAGISEL64-NEXT: s_wait_samplecnt 0x0 ; DAGISEL64-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL64-NEXT: s_wait_kmcnt 0x0 ; DAGISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; DAGISEL64-NEXT: s_clause 0x5 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; DAGISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16 ; DAGISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: v_mov_b32_e32 v4, s4 ; DAGISEL64-NEXT: v_mov_b32_e32 v0, s5 ; DAGISEL64-NEXT: v_mov_b32_e32 v1, s6 ; DAGISEL64-NEXT: v_mov_b32_e32 v2, s7 ; DAGISEL64-NEXT: v_mov_b32_e32 v3, s8 ; DAGISEL64-NEXT: v_mov_b32_e32 v5, s9 ; DAGISEL64-NEXT: scratch_store_b32 off, v4, s10 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_store_b128 off, v[0:3], s11 ; DAGISEL64-NEXT: scratch_store_b32 off, v5, s11 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: s_xor_b64 exec, s[0:1], -1 ; DAGISEL64-NEXT: s_clause 0x5 ; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32 ; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; DAGISEL64-NEXT: scratch_load_b32 v2, off, s32 offset:8 ; DAGISEL64-NEXT: scratch_load_b32 v3, off, s32 offset:12 ; DAGISEL64-NEXT: scratch_load_b32 v4, off, s32 offset:16 ; DAGISEL64-NEXT: scratch_load_b32 v5, off, s32 offset:20 ; DAGISEL64-NEXT: s_mov_b64 exec, s[0:1] ; DAGISEL64-NEXT: s_wait_loadcnt 0x0 ; DAGISEL64-NEXT: s_setpc_b64 s[30:31] ; ; GISEL64-LABEL: inreg_args: ; GISEL64: ; %bb.0: ; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL64-NEXT: s_wait_expcnt 0x0 ; GISEL64-NEXT: s_wait_samplecnt 0x0 ; GISEL64-NEXT: s_wait_bvhcnt 0x0 ; GISEL64-NEXT: s_wait_kmcnt 0x0 ; GISEL64-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GISEL64-NEXT: s_clause 0x5 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 ; GISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; GISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16 ; GISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_mov_b32 s0, s5 ; GISEL64-NEXT: s_mov_b32 s1, s6 ; GISEL64-NEXT: s_mov_b32 s2, s7 ; GISEL64-NEXT: s_mov_b32 s3, s8 ; GISEL64-NEXT: v_mov_b32_e32 v4, s4 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: v_mov_b32_e32 v0, s0 ; GISEL64-NEXT: v_mov_b32_e32 v1, s1 ; GISEL64-NEXT: v_mov_b32_e32 v2, s2 ; GISEL64-NEXT: v_mov_b32_e32 v3, s3 ; GISEL64-NEXT: v_mov_b32_e32 v5, s9 ; GISEL64-NEXT: scratch_store_b32 off, v4, s10 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_store_b128 off, v[0:3], s11 ; GISEL64-NEXT: scratch_store_b32 off, v5, s11 ; GISEL64-NEXT: s_xor_b64 exec, s[34:35], -1 ; GISEL64-NEXT: s_clause 0x5 ; GISEL64-NEXT: scratch_load_b32 v0, off, s32 ; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4 ; GISEL64-NEXT: scratch_load_b32 v2, off, s32 offset:8 ; GISEL64-NEXT: scratch_load_b32 v3, off, s32 offset:12 ; GISEL64-NEXT: scratch_load_b32 v4, off, s32 offset:16 ; GISEL64-NEXT: scratch_load_b32 v5, off, s32 offset:20 ; GISEL64-NEXT: s_mov_b64 exec, s[34:35] ; GISEL64-NEXT: s_wait_loadcnt 0x0 ; GISEL64-NEXT: s_setpc_b64 s[30:31] store i32 %i32, ptr addrspace(5) %ptr store <4 x i32> %v4i32, ptr addrspace(5) %ptr2 store float %float, ptr addrspace(5) %ptr2 ret void } declare amdgpu_gfx <2 x half> @gfx_callee(<2 x half> %x, <2 x half> %y) define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 x half> %x, <2 x half> %y) { ; DAGISEL-LABEL: call_gfx_from_whole_wave: ; DAGISEL: ; %bb.0: ; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL-NEXT: s_wait_expcnt 0x0 ; DAGISEL-NEXT: s_wait_samplecnt 0x0 ; DAGISEL-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL-NEXT: s_wait_kmcnt 0x0 ; DAGISEL-NEXT: s_mov_b32 s0, s33 ; DAGISEL-NEXT: s_mov_b32 s33, s32 ; DAGISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 ; DAGISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 ; DAGISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 ; DAGISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 ; DAGISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 ; DAGISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 ; DAGISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 ; DAGISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 ; DAGISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 ; DAGISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 ; DAGISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 ; DAGISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 ; DAGISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 ; DAGISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 ; DAGISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 ; DAGISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 ; DAGISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 ; DAGISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 ; DAGISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 ; DAGISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 ; DAGISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 ; DAGISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 ; DAGISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 ; DAGISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 ; DAGISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 ; DAGISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 ; DAGISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 ; DAGISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 ; DAGISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 ; DAGISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 ; DAGISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 ; DAGISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 ; DAGISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 ; DAGISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 ; DAGISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 ; DAGISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 ; DAGISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 ; DAGISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 ; DAGISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 ; DAGISEL-NEXT: scratch_store_b32 off, v48, s33 offset:164 ; DAGISEL-NEXT: scratch_store_b32 off, v49, s33 offset:168 ; DAGISEL-NEXT: scratch_store_b32 off, v50, s33 offset:172 ; DAGISEL-NEXT: scratch_store_b32 off, v51, s33 offset:176 ; DAGISEL-NEXT: scratch_store_b32 off, v52, s33 offset:180 ; DAGISEL-NEXT: scratch_store_b32 off, v53, s33 offset:184 ; DAGISEL-NEXT: scratch_store_b32 off, v54, s33 offset:188 ; DAGISEL-NEXT: scratch_store_b32 off, v55, s33 offset:192 ; DAGISEL-NEXT: scratch_store_b32 off, v64, s33 offset:196 ; DAGISEL-NEXT: scratch_store_b32 off, v65, s33 offset:200 ; DAGISEL-NEXT: scratch_store_b32 off, v66, s33 offset:204 ; DAGISEL-NEXT: scratch_store_b32 off, v67, s33 offset:208 ; DAGISEL-NEXT: scratch_store_b32 off, v68, s33 offset:212 ; DAGISEL-NEXT: scratch_store_b32 off, v69, s33 offset:216 ; DAGISEL-NEXT: scratch_store_b32 off, v70, s33 offset:220 ; DAGISEL-NEXT: scratch_store_b32 off, v71, s33 offset:224 ; DAGISEL-NEXT: scratch_store_b32 off, v80, s33 offset:228 ; DAGISEL-NEXT: scratch_store_b32 off, v81, s33 offset:232 ; DAGISEL-NEXT: scratch_store_b32 off, v82, s33 offset:236 ; DAGISEL-NEXT: scratch_store_b32 off, v83, s33 offset:240 ; DAGISEL-NEXT: scratch_store_b32 off, v84, s33 offset:244 ; DAGISEL-NEXT: scratch_store_b32 off, v85, s33 offset:248 ; DAGISEL-NEXT: scratch_store_b32 off, v86, s33 offset:252 ; DAGISEL-NEXT: scratch_store_b32 off, v87, s33 offset:256 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v96, s33 offset:260 ; DAGISEL-NEXT: scratch_store_b32 off, v97, s33 offset:264 ; DAGISEL-NEXT: scratch_store_b32 off, v98, s33 offset:268 ; DAGISEL-NEXT: scratch_store_b32 off, v99, s33 offset:272 ; DAGISEL-NEXT: scratch_store_b32 off, v100, s33 offset:276 ; DAGISEL-NEXT: scratch_store_b32 off, v101, s33 offset:280 ; DAGISEL-NEXT: scratch_store_b32 off, v102, s33 offset:284 ; DAGISEL-NEXT: scratch_store_b32 off, v103, s33 offset:288 ; DAGISEL-NEXT: scratch_store_b32 off, v112, s33 offset:292 ; DAGISEL-NEXT: scratch_store_b32 off, v113, s33 offset:296 ; DAGISEL-NEXT: scratch_store_b32 off, v114, s33 offset:300 ; DAGISEL-NEXT: scratch_store_b32 off, v115, s33 offset:304 ; DAGISEL-NEXT: scratch_store_b32 off, v116, s33 offset:308 ; DAGISEL-NEXT: scratch_store_b32 off, v117, s33 offset:312 ; DAGISEL-NEXT: scratch_store_b32 off, v118, s33 offset:316 ; DAGISEL-NEXT: scratch_store_b32 off, v119, s33 offset:320 ; DAGISEL-NEXT: scratch_store_b32 off, v128, s33 offset:324 ; DAGISEL-NEXT: scratch_store_b32 off, v129, s33 offset:328 ; DAGISEL-NEXT: scratch_store_b32 off, v130, s33 offset:332 ; DAGISEL-NEXT: scratch_store_b32 off, v131, s33 offset:336 ; DAGISEL-NEXT: scratch_store_b32 off, v132, s33 offset:340 ; DAGISEL-NEXT: scratch_store_b32 off, v133, s33 offset:344 ; DAGISEL-NEXT: scratch_store_b32 off, v134, s33 offset:348 ; DAGISEL-NEXT: scratch_store_b32 off, v135, s33 offset:352 ; DAGISEL-NEXT: scratch_store_b32 off, v144, s33 offset:356 ; DAGISEL-NEXT: scratch_store_b32 off, v145, s33 offset:360 ; DAGISEL-NEXT: scratch_store_b32 off, v146, s33 offset:364 ; DAGISEL-NEXT: scratch_store_b32 off, v147, s33 offset:368 ; DAGISEL-NEXT: scratch_store_b32 off, v148, s33 offset:372 ; DAGISEL-NEXT: scratch_store_b32 off, v149, s33 offset:376 ; DAGISEL-NEXT: scratch_store_b32 off, v150, s33 offset:380 ; DAGISEL-NEXT: scratch_store_b32 off, v151, s33 offset:384 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v160, s33 offset:388 ; DAGISEL-NEXT: scratch_store_b32 off, v161, s33 offset:392 ; DAGISEL-NEXT: scratch_store_b32 off, v162, s33 offset:396 ; DAGISEL-NEXT: scratch_store_b32 off, v163, s33 offset:400 ; DAGISEL-NEXT: scratch_store_b32 off, v164, s33 offset:404 ; DAGISEL-NEXT: scratch_store_b32 off, v165, s33 offset:408 ; DAGISEL-NEXT: scratch_store_b32 off, v166, s33 offset:412 ; DAGISEL-NEXT: scratch_store_b32 off, v167, s33 offset:416 ; DAGISEL-NEXT: scratch_store_b32 off, v176, s33 offset:420 ; DAGISEL-NEXT: scratch_store_b32 off, v177, s33 offset:424 ; DAGISEL-NEXT: scratch_store_b32 off, v178, s33 offset:428 ; DAGISEL-NEXT: scratch_store_b32 off, v179, s33 offset:432 ; DAGISEL-NEXT: scratch_store_b32 off, v180, s33 offset:436 ; DAGISEL-NEXT: scratch_store_b32 off, v181, s33 offset:440 ; DAGISEL-NEXT: scratch_store_b32 off, v182, s33 offset:444 ; DAGISEL-NEXT: scratch_store_b32 off, v183, s33 offset:448 ; DAGISEL-NEXT: scratch_store_b32 off, v192, s33 offset:452 ; DAGISEL-NEXT: scratch_store_b32 off, v193, s33 offset:456 ; DAGISEL-NEXT: scratch_store_b32 off, v194, s33 offset:460 ; DAGISEL-NEXT: scratch_store_b32 off, v195, s33 offset:464 ; DAGISEL-NEXT: scratch_store_b32 off, v196, s33 offset:468 ; DAGISEL-NEXT: scratch_store_b32 off, v197, s33 offset:472 ; DAGISEL-NEXT: scratch_store_b32 off, v198, s33 offset:476 ; DAGISEL-NEXT: scratch_store_b32 off, v199, s33 offset:480 ; DAGISEL-NEXT: scratch_store_b32 off, v208, s33 offset:484 ; DAGISEL-NEXT: scratch_store_b32 off, v209, s33 offset:488 ; DAGISEL-NEXT: scratch_store_b32 off, v210, s33 offset:492 ; DAGISEL-NEXT: scratch_store_b32 off, v211, s33 offset:496 ; DAGISEL-NEXT: scratch_store_b32 off, v212, s33 offset:500 ; DAGISEL-NEXT: scratch_store_b32 off, v213, s33 offset:504 ; DAGISEL-NEXT: scratch_store_b32 off, v214, s33 offset:508 ; DAGISEL-NEXT: scratch_store_b32 off, v215, s33 offset:512 ; DAGISEL-NEXT: s_clause 0xf ; DAGISEL-NEXT: scratch_store_b32 off, v224, s33 offset:516 ; DAGISEL-NEXT: scratch_store_b32 off, v225, s33 offset:520 ; DAGISEL-NEXT: scratch_store_b32 off, v226, s33 offset:524 ; DAGISEL-NEXT: scratch_store_b32 off, v227, s33 offset:528 ; DAGISEL-NEXT: scratch_store_b32 off, v228, s33 offset:532 ; DAGISEL-NEXT: scratch_store_b32 off, v229, s33 offset:536 ; DAGISEL-NEXT: scratch_store_b32 off, v230, s33 offset:540 ; DAGISEL-NEXT: scratch_store_b32 off, v231, s33 offset:544 ; DAGISEL-NEXT: scratch_store_b32 off, v240, s33 offset:548 ; DAGISEL-NEXT: scratch_store_b32 off, v241, s33 offset:552 ; DAGISEL-NEXT: scratch_store_b32 off, v242, s33 offset:556 ; DAGISEL-NEXT: scratch_store_b32 off, v243, s33 offset:560 ; DAGISEL-NEXT: scratch_store_b32 off, v244, s33 offset:564 ; DAGISEL-NEXT: scratch_store_b32 off, v245, s33 offset:568 ; DAGISEL-NEXT: scratch_store_b32 off, v246, s33 offset:572 ; DAGISEL-NEXT: scratch_store_b32 off, v247, s33 offset:576 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: v_writelane_b32 v40, s0, 3 ; DAGISEL-NEXT: v_mov_b32_e32 v2, v0 ; DAGISEL-NEXT: v_swap_b32 v0, v1 ; DAGISEL-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi ; DAGISEL-NEXT: v_writelane_b32 v40, s4, 0 ; DAGISEL-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo ; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250 ; DAGISEL-NEXT: v_writelane_b32 v40, s30, 1 ; DAGISEL-NEXT: v_writelane_b32 v40, s31, 2 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] ; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; DAGISEL-NEXT: v_readlane_b32 s31, v40, 2 ; DAGISEL-NEXT: v_readlane_b32 s30, v40, 1 ; DAGISEL-NEXT: v_readlane_b32 s4, v40, 0 ; DAGISEL-NEXT: v_readlane_b32 s0, v40, 3 ; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload ; DAGISEL-NEXT: s_mov_b32 s32, s33 ; DAGISEL-NEXT: s_xor_b32 exec_lo, s4, -1 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_load_b32 v0, off, s33 offset:4 ; DAGISEL-NEXT: scratch_load_b32 v1, off, s33 offset:8 ; DAGISEL-NEXT: scratch_load_b32 v2, off, s33 offset:12 ; DAGISEL-NEXT: scratch_load_b32 v3, off, s33 offset:16 ; DAGISEL-NEXT: scratch_load_b32 v4, off, s33 offset:20 ; DAGISEL-NEXT: scratch_load_b32 v5, off, s33 offset:24 ; DAGISEL-NEXT: scratch_load_b32 v6, off, s33 offset:28 ; DAGISEL-NEXT: scratch_load_b32 v7, off, s33 offset:32 ; DAGISEL-NEXT: scratch_load_b32 v8, off, s33 offset:36 ; DAGISEL-NEXT: scratch_load_b32 v9, off, s33 offset:40 ; DAGISEL-NEXT: scratch_load_b32 v10, off, s33 offset:44 ; DAGISEL-NEXT: scratch_load_b32 v11, off, s33 offset:48 ; DAGISEL-NEXT: scratch_load_b32 v12, off, s33 offset:52 ; DAGISEL-NEXT: scratch_load_b32 v13, off, s33 offset:56 ; DAGISEL-NEXT: scratch_load_b32 v14, off, s33 offset:60 ; DAGISEL-NEXT: scratch_load_b32 v15, off, s33 offset:64 ; DAGISEL-NEXT: scratch_load_b32 v16, off, s33 offset:68 ; DAGISEL-NEXT: scratch_load_b32 v17, off, s33 offset:72 ; DAGISEL-NEXT: scratch_load_b32 v18, off, s33 offset:76 ; DAGISEL-NEXT: scratch_load_b32 v19, off, s33 offset:80 ; DAGISEL-NEXT: scratch_load_b32 v20, off, s33 offset:84 ; DAGISEL-NEXT: scratch_load_b32 v21, off, s33 offset:88 ; DAGISEL-NEXT: scratch_load_b32 v22, off, s33 offset:92 ; DAGISEL-NEXT: scratch_load_b32 v23, off, s33 offset:96 ; DAGISEL-NEXT: scratch_load_b32 v24, off, s33 offset:100 ; DAGISEL-NEXT: scratch_load_b32 v25, off, s33 offset:104 ; DAGISEL-NEXT: scratch_load_b32 v26, off, s33 offset:108 ; DAGISEL-NEXT: scratch_load_b32 v27, off, s33 offset:112 ; DAGISEL-NEXT: scratch_load_b32 v28, off, s33 offset:116 ; DAGISEL-NEXT: scratch_load_b32 v29, off, s33 offset:120 ; DAGISEL-NEXT: scratch_load_b32 v30, off, s33 offset:124 ; DAGISEL-NEXT: scratch_load_b32 v31, off, s33 offset:128 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_load_b32 v32, off, s33 offset:132 ; DAGISEL-NEXT: scratch_load_b32 v33, off, s33 offset:136 ; DAGISEL-NEXT: scratch_load_b32 v34, off, s33 offset:140 ; DAGISEL-NEXT: scratch_load_b32 v35, off, s33 offset:144 ; DAGISEL-NEXT: scratch_load_b32 v36, off, s33 offset:148 ; DAGISEL-NEXT: scratch_load_b32 v37, off, s33 offset:152 ; DAGISEL-NEXT: scratch_load_b32 v38, off, s33 offset:156 ; DAGISEL-NEXT: scratch_load_b32 v39, off, s33 offset:160 ; DAGISEL-NEXT: scratch_load_b32 v48, off, s33 offset:164 ; DAGISEL-NEXT: scratch_load_b32 v49, off, s33 offset:168 ; DAGISEL-NEXT: scratch_load_b32 v50, off, s33 offset:172 ; DAGISEL-NEXT: scratch_load_b32 v51, off, s33 offset:176 ; DAGISEL-NEXT: scratch_load_b32 v52, off, s33 offset:180 ; DAGISEL-NEXT: scratch_load_b32 v53, off, s33 offset:184 ; DAGISEL-NEXT: scratch_load_b32 v54, off, s33 offset:188 ; DAGISEL-NEXT: scratch_load_b32 v55, off, s33 offset:192 ; DAGISEL-NEXT: scratch_load_b32 v64, off, s33 offset:196 ; DAGISEL-NEXT: scratch_load_b32 v65, off, s33 offset:200 ; DAGISEL-NEXT: scratch_load_b32 v66, off, s33 offset:204 ; DAGISEL-NEXT: scratch_load_b32 v67, off, s33 offset:208 ; DAGISEL-NEXT: scratch_load_b32 v68, off, s33 offset:212 ; DAGISEL-NEXT: scratch_load_b32 v69, off, s33 offset:216 ; DAGISEL-NEXT: scratch_load_b32 v70, off, s33 offset:220 ; DAGISEL-NEXT: scratch_load_b32 v71, off, s33 offset:224 ; DAGISEL-NEXT: scratch_load_b32 v80, off, s33 offset:228 ; DAGISEL-NEXT: scratch_load_b32 v81, off, s33 offset:232 ; DAGISEL-NEXT: scratch_load_b32 v82, off, s33 offset:236 ; DAGISEL-NEXT: scratch_load_b32 v83, off, s33 offset:240 ; DAGISEL-NEXT: scratch_load_b32 v84, off, s33 offset:244 ; DAGISEL-NEXT: scratch_load_b32 v85, off, s33 offset:248 ; DAGISEL-NEXT: scratch_load_b32 v86, off, s33 offset:252 ; DAGISEL-NEXT: scratch_load_b32 v87, off, s33 offset:256 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_load_b32 v96, off, s33 offset:260 ; DAGISEL-NEXT: scratch_load_b32 v97, off, s33 offset:264 ; DAGISEL-NEXT: scratch_load_b32 v98, off, s33 offset:268 ; DAGISEL-NEXT: scratch_load_b32 v99, off, s33 offset:272 ; DAGISEL-NEXT: scratch_load_b32 v100, off, s33 offset:276 ; DAGISEL-NEXT: scratch_load_b32 v101, off, s33 offset:280 ; DAGISEL-NEXT: scratch_load_b32 v102, off, s33 offset:284 ; DAGISEL-NEXT: scratch_load_b32 v103, off, s33 offset:288 ; DAGISEL-NEXT: scratch_load_b32 v112, off, s33 offset:292 ; DAGISEL-NEXT: scratch_load_b32 v113, off, s33 offset:296 ; DAGISEL-NEXT: scratch_load_b32 v114, off, s33 offset:300 ; DAGISEL-NEXT: scratch_load_b32 v115, off, s33 offset:304 ; DAGISEL-NEXT: scratch_load_b32 v116, off, s33 offset:308 ; DAGISEL-NEXT: scratch_load_b32 v117, off, s33 offset:312 ; DAGISEL-NEXT: scratch_load_b32 v118, off, s33 offset:316 ; DAGISEL-NEXT: scratch_load_b32 v119, off, s33 offset:320 ; DAGISEL-NEXT: scratch_load_b32 v128, off, s33 offset:324 ; DAGISEL-NEXT: scratch_load_b32 v129, off, s33 offset:328 ; DAGISEL-NEXT: scratch_load_b32 v130, off, s33 offset:332 ; DAGISEL-NEXT: scratch_load_b32 v131, off, s33 offset:336 ; DAGISEL-NEXT: scratch_load_b32 v132, off, s33 offset:340 ; DAGISEL-NEXT: scratch_load_b32 v133, off, s33 offset:344 ; DAGISEL-NEXT: scratch_load_b32 v134, off, s33 offset:348 ; DAGISEL-NEXT: scratch_load_b32 v135, off, s33 offset:352 ; DAGISEL-NEXT: scratch_load_b32 v144, off, s33 offset:356 ; DAGISEL-NEXT: scratch_load_b32 v145, off, s33 offset:360 ; DAGISEL-NEXT: scratch_load_b32 v146, off, s33 offset:364 ; DAGISEL-NEXT: scratch_load_b32 v147, off, s33 offset:368 ; DAGISEL-NEXT: scratch_load_b32 v148, off, s33 offset:372 ; DAGISEL-NEXT: scratch_load_b32 v149, off, s33 offset:376 ; DAGISEL-NEXT: scratch_load_b32 v150, off, s33 offset:380 ; DAGISEL-NEXT: scratch_load_b32 v151, off, s33 offset:384 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_load_b32 v160, off, s33 offset:388 ; DAGISEL-NEXT: scratch_load_b32 v161, off, s33 offset:392 ; DAGISEL-NEXT: scratch_load_b32 v162, off, s33 offset:396 ; DAGISEL-NEXT: scratch_load_b32 v163, off, s33 offset:400 ; DAGISEL-NEXT: scratch_load_b32 v164, off, s33 offset:404 ; DAGISEL-NEXT: scratch_load_b32 v165, off, s33 offset:408 ; DAGISEL-NEXT: scratch_load_b32 v166, off, s33 offset:412 ; DAGISEL-NEXT: scratch_load_b32 v167, off, s33 offset:416 ; DAGISEL-NEXT: scratch_load_b32 v176, off, s33 offset:420 ; DAGISEL-NEXT: scratch_load_b32 v177, off, s33 offset:424 ; DAGISEL-NEXT: scratch_load_b32 v178, off, s33 offset:428 ; DAGISEL-NEXT: scratch_load_b32 v179, off, s33 offset:432 ; DAGISEL-NEXT: scratch_load_b32 v180, off, s33 offset:436 ; DAGISEL-NEXT: scratch_load_b32 v181, off, s33 offset:440 ; DAGISEL-NEXT: scratch_load_b32 v182, off, s33 offset:444 ; DAGISEL-NEXT: scratch_load_b32 v183, off, s33 offset:448 ; DAGISEL-NEXT: scratch_load_b32 v192, off, s33 offset:452 ; DAGISEL-NEXT: scratch_load_b32 v193, off, s33 offset:456 ; DAGISEL-NEXT: scratch_load_b32 v194, off, s33 offset:460 ; DAGISEL-NEXT: scratch_load_b32 v195, off, s33 offset:464 ; DAGISEL-NEXT: scratch_load_b32 v196, off, s33 offset:468 ; DAGISEL-NEXT: scratch_load_b32 v197, off, s33 offset:472 ; DAGISEL-NEXT: scratch_load_b32 v198, off, s33 offset:476 ; DAGISEL-NEXT: scratch_load_b32 v199, off, s33 offset:480 ; DAGISEL-NEXT: scratch_load_b32 v208, off, s33 offset:484 ; DAGISEL-NEXT: scratch_load_b32 v209, off, s33 offset:488 ; DAGISEL-NEXT: scratch_load_b32 v210, off, s33 offset:492 ; DAGISEL-NEXT: scratch_load_b32 v211, off, s33 offset:496 ; DAGISEL-NEXT: scratch_load_b32 v212, off, s33 offset:500 ; DAGISEL-NEXT: scratch_load_b32 v213, off, s33 offset:504 ; DAGISEL-NEXT: scratch_load_b32 v214, off, s33 offset:508 ; DAGISEL-NEXT: scratch_load_b32 v215, off, s33 offset:512 ; DAGISEL-NEXT: s_clause 0xf ; DAGISEL-NEXT: scratch_load_b32 v224, off, s33 offset:516 ; DAGISEL-NEXT: scratch_load_b32 v225, off, s33 offset:520 ; DAGISEL-NEXT: scratch_load_b32 v226, off, s33 offset:524 ; DAGISEL-NEXT: scratch_load_b32 v227, off, s33 offset:528 ; DAGISEL-NEXT: scratch_load_b32 v228, off, s33 offset:532 ; DAGISEL-NEXT: scratch_load_b32 v229, off, s33 offset:536 ; DAGISEL-NEXT: scratch_load_b32 v230, off, s33 offset:540 ; DAGISEL-NEXT: scratch_load_b32 v231, off, s33 offset:544 ; DAGISEL-NEXT: scratch_load_b32 v240, off, s33 offset:548 ; DAGISEL-NEXT: scratch_load_b32 v241, off, s33 offset:552 ; DAGISEL-NEXT: scratch_load_b32 v242, off, s33 offset:556 ; DAGISEL-NEXT: scratch_load_b32 v243, off, s33 offset:560 ; DAGISEL-NEXT: scratch_load_b32 v244, off, s33 offset:564 ; DAGISEL-NEXT: scratch_load_b32 v245, off, s33 offset:568 ; DAGISEL-NEXT: scratch_load_b32 v246, off, s33 offset:572 ; DAGISEL-NEXT: scratch_load_b32 v247, off, s33 offset:576 ; DAGISEL-NEXT: s_mov_b32 exec_lo, s4 ; DAGISEL-NEXT: s_mov_b32 s33, s0 ; DAGISEL-NEXT: s_wait_loadcnt 0x0 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: call_gfx_from_whole_wave: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL-NEXT: s_wait_expcnt 0x0 ; GISEL-NEXT: s_wait_samplecnt 0x0 ; GISEL-NEXT: s_wait_bvhcnt 0x0 ; GISEL-NEXT: s_wait_kmcnt 0x0 ; GISEL-NEXT: s_mov_b32 s0, s33 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 ; GISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 ; GISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 ; GISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 ; GISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 ; GISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 ; GISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 ; GISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 ; GISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 ; GISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 ; GISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 ; GISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 ; GISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 ; GISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 ; GISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 ; GISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 ; GISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 ; GISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 ; GISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 ; GISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 ; GISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 ; GISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 ; GISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 ; GISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 ; GISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 ; GISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 ; GISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 ; GISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 ; GISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 ; GISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 ; GISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 ; GISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 ; GISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 ; GISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 ; GISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 ; GISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 ; GISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 ; GISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 ; GISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 ; GISEL-NEXT: scratch_store_b32 off, v48, s33 offset:164 ; GISEL-NEXT: scratch_store_b32 off, v49, s33 offset:168 ; GISEL-NEXT: scratch_store_b32 off, v50, s33 offset:172 ; GISEL-NEXT: scratch_store_b32 off, v51, s33 offset:176 ; GISEL-NEXT: scratch_store_b32 off, v52, s33 offset:180 ; GISEL-NEXT: scratch_store_b32 off, v53, s33 offset:184 ; GISEL-NEXT: scratch_store_b32 off, v54, s33 offset:188 ; GISEL-NEXT: scratch_store_b32 off, v55, s33 offset:192 ; GISEL-NEXT: scratch_store_b32 off, v64, s33 offset:196 ; GISEL-NEXT: scratch_store_b32 off, v65, s33 offset:200 ; GISEL-NEXT: scratch_store_b32 off, v66, s33 offset:204 ; GISEL-NEXT: scratch_store_b32 off, v67, s33 offset:208 ; GISEL-NEXT: scratch_store_b32 off, v68, s33 offset:212 ; GISEL-NEXT: scratch_store_b32 off, v69, s33 offset:216 ; GISEL-NEXT: scratch_store_b32 off, v70, s33 offset:220 ; GISEL-NEXT: scratch_store_b32 off, v71, s33 offset:224 ; GISEL-NEXT: scratch_store_b32 off, v80, s33 offset:228 ; GISEL-NEXT: scratch_store_b32 off, v81, s33 offset:232 ; GISEL-NEXT: scratch_store_b32 off, v82, s33 offset:236 ; GISEL-NEXT: scratch_store_b32 off, v83, s33 offset:240 ; GISEL-NEXT: scratch_store_b32 off, v84, s33 offset:244 ; GISEL-NEXT: scratch_store_b32 off, v85, s33 offset:248 ; GISEL-NEXT: scratch_store_b32 off, v86, s33 offset:252 ; GISEL-NEXT: scratch_store_b32 off, v87, s33 offset:256 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v96, s33 offset:260 ; GISEL-NEXT: scratch_store_b32 off, v97, s33 offset:264 ; GISEL-NEXT: scratch_store_b32 off, v98, s33 offset:268 ; GISEL-NEXT: scratch_store_b32 off, v99, s33 offset:272 ; GISEL-NEXT: scratch_store_b32 off, v100, s33 offset:276 ; GISEL-NEXT: scratch_store_b32 off, v101, s33 offset:280 ; GISEL-NEXT: scratch_store_b32 off, v102, s33 offset:284 ; GISEL-NEXT: scratch_store_b32 off, v103, s33 offset:288 ; GISEL-NEXT: scratch_store_b32 off, v112, s33 offset:292 ; GISEL-NEXT: scratch_store_b32 off, v113, s33 offset:296 ; GISEL-NEXT: scratch_store_b32 off, v114, s33 offset:300 ; GISEL-NEXT: scratch_store_b32 off, v115, s33 offset:304 ; GISEL-NEXT: scratch_store_b32 off, v116, s33 offset:308 ; GISEL-NEXT: scratch_store_b32 off, v117, s33 offset:312 ; GISEL-NEXT: scratch_store_b32 off, v118, s33 offset:316 ; GISEL-NEXT: scratch_store_b32 off, v119, s33 offset:320 ; GISEL-NEXT: scratch_store_b32 off, v128, s33 offset:324 ; GISEL-NEXT: scratch_store_b32 off, v129, s33 offset:328 ; GISEL-NEXT: scratch_store_b32 off, v130, s33 offset:332 ; GISEL-NEXT: scratch_store_b32 off, v131, s33 offset:336 ; GISEL-NEXT: scratch_store_b32 off, v132, s33 offset:340 ; GISEL-NEXT: scratch_store_b32 off, v133, s33 offset:344 ; GISEL-NEXT: scratch_store_b32 off, v134, s33 offset:348 ; GISEL-NEXT: scratch_store_b32 off, v135, s33 offset:352 ; GISEL-NEXT: scratch_store_b32 off, v144, s33 offset:356 ; GISEL-NEXT: scratch_store_b32 off, v145, s33 offset:360 ; GISEL-NEXT: scratch_store_b32 off, v146, s33 offset:364 ; GISEL-NEXT: scratch_store_b32 off, v147, s33 offset:368 ; GISEL-NEXT: scratch_store_b32 off, v148, s33 offset:372 ; GISEL-NEXT: scratch_store_b32 off, v149, s33 offset:376 ; GISEL-NEXT: scratch_store_b32 off, v150, s33 offset:380 ; GISEL-NEXT: scratch_store_b32 off, v151, s33 offset:384 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v160, s33 offset:388 ; GISEL-NEXT: scratch_store_b32 off, v161, s33 offset:392 ; GISEL-NEXT: scratch_store_b32 off, v162, s33 offset:396 ; GISEL-NEXT: scratch_store_b32 off, v163, s33 offset:400 ; GISEL-NEXT: scratch_store_b32 off, v164, s33 offset:404 ; GISEL-NEXT: scratch_store_b32 off, v165, s33 offset:408 ; GISEL-NEXT: scratch_store_b32 off, v166, s33 offset:412 ; GISEL-NEXT: scratch_store_b32 off, v167, s33 offset:416 ; GISEL-NEXT: scratch_store_b32 off, v176, s33 offset:420 ; GISEL-NEXT: scratch_store_b32 off, v177, s33 offset:424 ; GISEL-NEXT: scratch_store_b32 off, v178, s33 offset:428 ; GISEL-NEXT: scratch_store_b32 off, v179, s33 offset:432 ; GISEL-NEXT: scratch_store_b32 off, v180, s33 offset:436 ; GISEL-NEXT: scratch_store_b32 off, v181, s33 offset:440 ; GISEL-NEXT: scratch_store_b32 off, v182, s33 offset:444 ; GISEL-NEXT: scratch_store_b32 off, v183, s33 offset:448 ; GISEL-NEXT: scratch_store_b32 off, v192, s33 offset:452 ; GISEL-NEXT: scratch_store_b32 off, v193, s33 offset:456 ; GISEL-NEXT: scratch_store_b32 off, v194, s33 offset:460 ; GISEL-NEXT: scratch_store_b32 off, v195, s33 offset:464 ; GISEL-NEXT: scratch_store_b32 off, v196, s33 offset:468 ; GISEL-NEXT: scratch_store_b32 off, v197, s33 offset:472 ; GISEL-NEXT: scratch_store_b32 off, v198, s33 offset:476 ; GISEL-NEXT: scratch_store_b32 off, v199, s33 offset:480 ; GISEL-NEXT: scratch_store_b32 off, v208, s33 offset:484 ; GISEL-NEXT: scratch_store_b32 off, v209, s33 offset:488 ; GISEL-NEXT: scratch_store_b32 off, v210, s33 offset:492 ; GISEL-NEXT: scratch_store_b32 off, v211, s33 offset:496 ; GISEL-NEXT: scratch_store_b32 off, v212, s33 offset:500 ; GISEL-NEXT: scratch_store_b32 off, v213, s33 offset:504 ; GISEL-NEXT: scratch_store_b32 off, v214, s33 offset:508 ; GISEL-NEXT: scratch_store_b32 off, v215, s33 offset:512 ; GISEL-NEXT: s_clause 0xf ; GISEL-NEXT: scratch_store_b32 off, v224, s33 offset:516 ; GISEL-NEXT: scratch_store_b32 off, v225, s33 offset:520 ; GISEL-NEXT: scratch_store_b32 off, v226, s33 offset:524 ; GISEL-NEXT: scratch_store_b32 off, v227, s33 offset:528 ; GISEL-NEXT: scratch_store_b32 off, v228, s33 offset:532 ; GISEL-NEXT: scratch_store_b32 off, v229, s33 offset:536 ; GISEL-NEXT: scratch_store_b32 off, v230, s33 offset:540 ; GISEL-NEXT: scratch_store_b32 off, v231, s33 offset:544 ; GISEL-NEXT: scratch_store_b32 off, v240, s33 offset:548 ; GISEL-NEXT: scratch_store_b32 off, v241, s33 offset:552 ; GISEL-NEXT: scratch_store_b32 off, v242, s33 offset:556 ; GISEL-NEXT: scratch_store_b32 off, v243, s33 offset:560 ; GISEL-NEXT: scratch_store_b32 off, v244, s33 offset:564 ; GISEL-NEXT: scratch_store_b32 off, v245, s33 offset:568 ; GISEL-NEXT: scratch_store_b32 off, v246, s33 offset:572 ; GISEL-NEXT: scratch_store_b32 off, v247, s33 offset:576 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: v_writelane_b32 v40, s0, 3 ; GISEL-NEXT: v_mov_b32_e32 v2, v0 ; GISEL-NEXT: v_swap_b32 v0, v1 ; GISEL-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo ; GISEL-NEXT: v_writelane_b32 v40, s4, 0 ; GISEL-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi ; GISEL-NEXT: s_addk_co_i32 s32, 0x250 ; GISEL-NEXT: v_writelane_b32 v40, s30, 1 ; GISEL-NEXT: v_writelane_b32 v40, s31, 2 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL-NEXT: v_readlane_b32 s31, v40, 2 ; GISEL-NEXT: v_readlane_b32 s30, v40, 1 ; GISEL-NEXT: v_readlane_b32 s4, v40, 0 ; GISEL-NEXT: v_readlane_b32 s0, v40, 3 ; GISEL-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: s_xor_b32 exec_lo, s4, -1 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_load_b32 v0, off, s33 offset:4 ; GISEL-NEXT: scratch_load_b32 v1, off, s33 offset:8 ; GISEL-NEXT: scratch_load_b32 v2, off, s33 offset:12 ; GISEL-NEXT: scratch_load_b32 v3, off, s33 offset:16 ; GISEL-NEXT: scratch_load_b32 v4, off, s33 offset:20 ; GISEL-NEXT: scratch_load_b32 v5, off, s33 offset:24 ; GISEL-NEXT: scratch_load_b32 v6, off, s33 offset:28 ; GISEL-NEXT: scratch_load_b32 v7, off, s33 offset:32 ; GISEL-NEXT: scratch_load_b32 v8, off, s33 offset:36 ; GISEL-NEXT: scratch_load_b32 v9, off, s33 offset:40 ; GISEL-NEXT: scratch_load_b32 v10, off, s33 offset:44 ; GISEL-NEXT: scratch_load_b32 v11, off, s33 offset:48 ; GISEL-NEXT: scratch_load_b32 v12, off, s33 offset:52 ; GISEL-NEXT: scratch_load_b32 v13, off, s33 offset:56 ; GISEL-NEXT: scratch_load_b32 v14, off, s33 offset:60 ; GISEL-NEXT: scratch_load_b32 v15, off, s33 offset:64 ; GISEL-NEXT: scratch_load_b32 v16, off, s33 offset:68 ; GISEL-NEXT: scratch_load_b32 v17, off, s33 offset:72 ; GISEL-NEXT: scratch_load_b32 v18, off, s33 offset:76 ; GISEL-NEXT: scratch_load_b32 v19, off, s33 offset:80 ; GISEL-NEXT: scratch_load_b32 v20, off, s33 offset:84 ; GISEL-NEXT: scratch_load_b32 v21, off, s33 offset:88 ; GISEL-NEXT: scratch_load_b32 v22, off, s33 offset:92 ; GISEL-NEXT: scratch_load_b32 v23, off, s33 offset:96 ; GISEL-NEXT: scratch_load_b32 v24, off, s33 offset:100 ; GISEL-NEXT: scratch_load_b32 v25, off, s33 offset:104 ; GISEL-NEXT: scratch_load_b32 v26, off, s33 offset:108 ; GISEL-NEXT: scratch_load_b32 v27, off, s33 offset:112 ; GISEL-NEXT: scratch_load_b32 v28, off, s33 offset:116 ; GISEL-NEXT: scratch_load_b32 v29, off, s33 offset:120 ; GISEL-NEXT: scratch_load_b32 v30, off, s33 offset:124 ; GISEL-NEXT: scratch_load_b32 v31, off, s33 offset:128 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_load_b32 v32, off, s33 offset:132 ; GISEL-NEXT: scratch_load_b32 v33, off, s33 offset:136 ; GISEL-NEXT: scratch_load_b32 v34, off, s33 offset:140 ; GISEL-NEXT: scratch_load_b32 v35, off, s33 offset:144 ; GISEL-NEXT: scratch_load_b32 v36, off, s33 offset:148 ; GISEL-NEXT: scratch_load_b32 v37, off, s33 offset:152 ; GISEL-NEXT: scratch_load_b32 v38, off, s33 offset:156 ; GISEL-NEXT: scratch_load_b32 v39, off, s33 offset:160 ; GISEL-NEXT: scratch_load_b32 v48, off, s33 offset:164 ; GISEL-NEXT: scratch_load_b32 v49, off, s33 offset:168 ; GISEL-NEXT: scratch_load_b32 v50, off, s33 offset:172 ; GISEL-NEXT: scratch_load_b32 v51, off, s33 offset:176 ; GISEL-NEXT: scratch_load_b32 v52, off, s33 offset:180 ; GISEL-NEXT: scratch_load_b32 v53, off, s33 offset:184 ; GISEL-NEXT: scratch_load_b32 v54, off, s33 offset:188 ; GISEL-NEXT: scratch_load_b32 v55, off, s33 offset:192 ; GISEL-NEXT: scratch_load_b32 v64, off, s33 offset:196 ; GISEL-NEXT: scratch_load_b32 v65, off, s33 offset:200 ; GISEL-NEXT: scratch_load_b32 v66, off, s33 offset:204 ; GISEL-NEXT: scratch_load_b32 v67, off, s33 offset:208 ; GISEL-NEXT: scratch_load_b32 v68, off, s33 offset:212 ; GISEL-NEXT: scratch_load_b32 v69, off, s33 offset:216 ; GISEL-NEXT: scratch_load_b32 v70, off, s33 offset:220 ; GISEL-NEXT: scratch_load_b32 v71, off, s33 offset:224 ; GISEL-NEXT: scratch_load_b32 v80, off, s33 offset:228 ; GISEL-NEXT: scratch_load_b32 v81, off, s33 offset:232 ; GISEL-NEXT: scratch_load_b32 v82, off, s33 offset:236 ; GISEL-NEXT: scratch_load_b32 v83, off, s33 offset:240 ; GISEL-NEXT: scratch_load_b32 v84, off, s33 offset:244 ; GISEL-NEXT: scratch_load_b32 v85, off, s33 offset:248 ; GISEL-NEXT: scratch_load_b32 v86, off, s33 offset:252 ; GISEL-NEXT: scratch_load_b32 v87, off, s33 offset:256 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_load_b32 v96, off, s33 offset:260 ; GISEL-NEXT: scratch_load_b32 v97, off, s33 offset:264 ; GISEL-NEXT: scratch_load_b32 v98, off, s33 offset:268 ; GISEL-NEXT: scratch_load_b32 v99, off, s33 offset:272 ; GISEL-NEXT: scratch_load_b32 v100, off, s33 offset:276 ; GISEL-NEXT: scratch_load_b32 v101, off, s33 offset:280 ; GISEL-NEXT: scratch_load_b32 v102, off, s33 offset:284 ; GISEL-NEXT: scratch_load_b32 v103, off, s33 offset:288 ; GISEL-NEXT: scratch_load_b32 v112, off, s33 offset:292 ; GISEL-NEXT: scratch_load_b32 v113, off, s33 offset:296 ; GISEL-NEXT: scratch_load_b32 v114, off, s33 offset:300 ; GISEL-NEXT: scratch_load_b32 v115, off, s33 offset:304 ; GISEL-NEXT: scratch_load_b32 v116, off, s33 offset:308 ; GISEL-NEXT: scratch_load_b32 v117, off, s33 offset:312 ; GISEL-NEXT: scratch_load_b32 v118, off, s33 offset:316 ; GISEL-NEXT: scratch_load_b32 v119, off, s33 offset:320 ; GISEL-NEXT: scratch_load_b32 v128, off, s33 offset:324 ; GISEL-NEXT: scratch_load_b32 v129, off, s33 offset:328 ; GISEL-NEXT: scratch_load_b32 v130, off, s33 offset:332 ; GISEL-NEXT: scratch_load_b32 v131, off, s33 offset:336 ; GISEL-NEXT: scratch_load_b32 v132, off, s33 offset:340 ; GISEL-NEXT: scratch_load_b32 v133, off, s33 offset:344 ; GISEL-NEXT: scratch_load_b32 v134, off, s33 offset:348 ; GISEL-NEXT: scratch_load_b32 v135, off, s33 offset:352 ; GISEL-NEXT: scratch_load_b32 v144, off, s33 offset:356 ; GISEL-NEXT: scratch_load_b32 v145, off, s33 offset:360 ; GISEL-NEXT: scratch_load_b32 v146, off, s33 offset:364 ; GISEL-NEXT: scratch_load_b32 v147, off, s33 offset:368 ; GISEL-NEXT: scratch_load_b32 v148, off, s33 offset:372 ; GISEL-NEXT: scratch_load_b32 v149, off, s33 offset:376 ; GISEL-NEXT: scratch_load_b32 v150, off, s33 offset:380 ; GISEL-NEXT: scratch_load_b32 v151, off, s33 offset:384 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_load_b32 v160, off, s33 offset:388 ; GISEL-NEXT: scratch_load_b32 v161, off, s33 offset:392 ; GISEL-NEXT: scratch_load_b32 v162, off, s33 offset:396 ; GISEL-NEXT: scratch_load_b32 v163, off, s33 offset:400 ; GISEL-NEXT: scratch_load_b32 v164, off, s33 offset:404 ; GISEL-NEXT: scratch_load_b32 v165, off, s33 offset:408 ; GISEL-NEXT: scratch_load_b32 v166, off, s33 offset:412 ; GISEL-NEXT: scratch_load_b32 v167, off, s33 offset:416 ; GISEL-NEXT: scratch_load_b32 v176, off, s33 offset:420 ; GISEL-NEXT: scratch_load_b32 v177, off, s33 offset:424 ; GISEL-NEXT: scratch_load_b32 v178, off, s33 offset:428 ; GISEL-NEXT: scratch_load_b32 v179, off, s33 offset:432 ; GISEL-NEXT: scratch_load_b32 v180, off, s33 offset:436 ; GISEL-NEXT: scratch_load_b32 v181, off, s33 offset:440 ; GISEL-NEXT: scratch_load_b32 v182, off, s33 offset:444 ; GISEL-NEXT: scratch_load_b32 v183, off, s33 offset:448 ; GISEL-NEXT: scratch_load_b32 v192, off, s33 offset:452 ; GISEL-NEXT: scratch_load_b32 v193, off, s33 offset:456 ; GISEL-NEXT: scratch_load_b32 v194, off, s33 offset:460 ; GISEL-NEXT: scratch_load_b32 v195, off, s33 offset:464 ; GISEL-NEXT: scratch_load_b32 v196, off, s33 offset:468 ; GISEL-NEXT: scratch_load_b32 v197, off, s33 offset:472 ; GISEL-NEXT: scratch_load_b32 v198, off, s33 offset:476 ; GISEL-NEXT: scratch_load_b32 v199, off, s33 offset:480 ; GISEL-NEXT: scratch_load_b32 v208, off, s33 offset:484 ; GISEL-NEXT: scratch_load_b32 v209, off, s33 offset:488 ; GISEL-NEXT: scratch_load_b32 v210, off, s33 offset:492 ; GISEL-NEXT: scratch_load_b32 v211, off, s33 offset:496 ; GISEL-NEXT: scratch_load_b32 v212, off, s33 offset:500 ; GISEL-NEXT: scratch_load_b32 v213, off, s33 offset:504 ; GISEL-NEXT: scratch_load_b32 v214, off, s33 offset:508 ; GISEL-NEXT: scratch_load_b32 v215, off, s33 offset:512 ; GISEL-NEXT: s_clause 0xf ; GISEL-NEXT: scratch_load_b32 v224, off, s33 offset:516 ; GISEL-NEXT: scratch_load_b32 v225, off, s33 offset:520 ; GISEL-NEXT: scratch_load_b32 v226, off, s33 offset:524 ; GISEL-NEXT: scratch_load_b32 v227, off, s33 offset:528 ; GISEL-NEXT: scratch_load_b32 v228, off, s33 offset:532 ; GISEL-NEXT: scratch_load_b32 v229, off, s33 offset:536 ; GISEL-NEXT: scratch_load_b32 v230, off, s33 offset:540 ; GISEL-NEXT: scratch_load_b32 v231, off, s33 offset:544 ; GISEL-NEXT: scratch_load_b32 v240, off, s33 offset:548 ; GISEL-NEXT: scratch_load_b32 v241, off, s33 offset:552 ; GISEL-NEXT: scratch_load_b32 v242, off, s33 offset:556 ; GISEL-NEXT: scratch_load_b32 v243, off, s33 offset:560 ; GISEL-NEXT: scratch_load_b32 v244, off, s33 offset:564 ; GISEL-NEXT: scratch_load_b32 v245, off, s33 offset:568 ; GISEL-NEXT: scratch_load_b32 v246, off, s33 offset:572 ; GISEL-NEXT: scratch_load_b32 v247, off, s33 offset:576 ; GISEL-NEXT: s_mov_b32 exec_lo, s4 ; GISEL-NEXT: s_mov_b32 s33, s0 ; GISEL-NEXT: s_wait_loadcnt 0x0 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; DAGISEL64-LABEL: call_gfx_from_whole_wave: ; DAGISEL64: ; %bb.0: ; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; DAGISEL64-NEXT: s_wait_expcnt 0x0 ; DAGISEL64-NEXT: s_wait_samplecnt 0x0 ; DAGISEL64-NEXT: s_wait_bvhcnt 0x0 ; DAGISEL64-NEXT: s_wait_kmcnt 0x0 ; DAGISEL64-NEXT: s_mov_b32 s0, s33 ; DAGISEL64-NEXT: s_mov_b32 s33, s32 ; DAGISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4 ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8 ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12 ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16 ; DAGISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20 ; DAGISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24 ; DAGISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28 ; DAGISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32 ; DAGISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36 ; DAGISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40 ; DAGISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44 ; DAGISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48 ; DAGISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52 ; DAGISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56 ; DAGISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60 ; DAGISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64 ; DAGISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68 ; DAGISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72 ; DAGISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76 ; DAGISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80 ; DAGISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84 ; DAGISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88 ; DAGISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92 ; DAGISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96 ; DAGISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100 ; DAGISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104 ; DAGISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108 ; DAGISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112 ; DAGISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116 ; DAGISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120 ; DAGISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124 ; DAGISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132 ; DAGISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136 ; DAGISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140 ; DAGISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144 ; DAGISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148 ; DAGISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152 ; DAGISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156 ; DAGISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160 ; DAGISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:164 ; DAGISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:168 ; DAGISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:172 ; DAGISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:176 ; DAGISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:180 ; DAGISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:184 ; DAGISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:188 ; DAGISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:192 ; DAGISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:196 ; DAGISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:200 ; DAGISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:204 ; DAGISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:208 ; DAGISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:212 ; DAGISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:216 ; DAGISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:220 ; DAGISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:224 ; DAGISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:228 ; DAGISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:232 ; DAGISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:236 ; DAGISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:240 ; DAGISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:244 ; DAGISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:248 ; DAGISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:252 ; DAGISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:256 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:260 ; DAGISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:264 ; DAGISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:268 ; DAGISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:272 ; DAGISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:276 ; DAGISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:280 ; DAGISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:284 ; DAGISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:288 ; DAGISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:292 ; DAGISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:296 ; DAGISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:300 ; DAGISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:304 ; DAGISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:308 ; DAGISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:312 ; DAGISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:316 ; DAGISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:320 ; DAGISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:324 ; DAGISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:328 ; DAGISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:332 ; DAGISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:336 ; DAGISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:340 ; DAGISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:344 ; DAGISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:348 ; DAGISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:352 ; DAGISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:356 ; DAGISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:360 ; DAGISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:364 ; DAGISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:368 ; DAGISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:372 ; DAGISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:376 ; DAGISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:380 ; DAGISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:384 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:388 ; DAGISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:392 ; DAGISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:396 ; DAGISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:400 ; DAGISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:404 ; DAGISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:408 ; DAGISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:412 ; DAGISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:416 ; DAGISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:420 ; DAGISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:424 ; DAGISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:428 ; DAGISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:432 ; DAGISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:436 ; DAGISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:440 ; DAGISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:444 ; DAGISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:448 ; DAGISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:452 ; DAGISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:456 ; DAGISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:460 ; DAGISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:464 ; DAGISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:468 ; DAGISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:472 ; DAGISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:476 ; DAGISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:480 ; DAGISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:484 ; DAGISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:488 ; DAGISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:492 ; DAGISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:496 ; DAGISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:500 ; DAGISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:504 ; DAGISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:508 ; DAGISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:512 ; DAGISEL64-NEXT: s_clause 0xf ; DAGISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:516 ; DAGISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:520 ; DAGISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:524 ; DAGISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:528 ; DAGISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:532 ; DAGISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:536 ; DAGISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:540 ; DAGISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:544 ; DAGISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:548 ; DAGISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:552 ; DAGISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:556 ; DAGISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:560 ; DAGISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:564 ; DAGISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:568 ; DAGISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:572 ; DAGISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:576 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: v_writelane_b32 v40, s0, 4 ; DAGISEL64-NEXT: v_mov_b32_e32 v2, v0 ; DAGISEL64-NEXT: v_swap_b32 v0, v1 ; DAGISEL64-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi ; DAGISEL64-NEXT: v_writelane_b32 v40, s4, 0 ; DAGISEL64-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo ; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x250 ; DAGISEL64-NEXT: v_writelane_b32 v40, s5, 1 ; DAGISEL64-NEXT: v_writelane_b32 v40, s30, 2 ; DAGISEL64-NEXT: v_writelane_b32 v40, s31, 3 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] ; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) ; DAGISEL64-NEXT: v_readlane_b32 s31, v40, 3 ; DAGISEL64-NEXT: v_readlane_b32 s30, v40, 2 ; DAGISEL64-NEXT: v_readlane_b32 s5, v40, 1 ; DAGISEL64-NEXT: v_readlane_b32 s4, v40, 0 ; DAGISEL64-NEXT: v_readlane_b32 s0, v40, 4 ; DAGISEL64-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload ; DAGISEL64-NEXT: s_mov_b32 s32, s33 ; DAGISEL64-NEXT: s_xor_b64 exec, s[4:5], -1 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_load_b32 v0, off, s33 offset:4 ; DAGISEL64-NEXT: scratch_load_b32 v1, off, s33 offset:8 ; DAGISEL64-NEXT: scratch_load_b32 v2, off, s33 offset:12 ; DAGISEL64-NEXT: scratch_load_b32 v3, off, s33 offset:16 ; DAGISEL64-NEXT: scratch_load_b32 v4, off, s33 offset:20 ; DAGISEL64-NEXT: scratch_load_b32 v5, off, s33 offset:24 ; DAGISEL64-NEXT: scratch_load_b32 v6, off, s33 offset:28 ; DAGISEL64-NEXT: scratch_load_b32 v7, off, s33 offset:32 ; DAGISEL64-NEXT: scratch_load_b32 v8, off, s33 offset:36 ; DAGISEL64-NEXT: scratch_load_b32 v9, off, s33 offset:40 ; DAGISEL64-NEXT: scratch_load_b32 v10, off, s33 offset:44 ; DAGISEL64-NEXT: scratch_load_b32 v11, off, s33 offset:48 ; DAGISEL64-NEXT: scratch_load_b32 v12, off, s33 offset:52 ; DAGISEL64-NEXT: scratch_load_b32 v13, off, s33 offset:56 ; DAGISEL64-NEXT: scratch_load_b32 v14, off, s33 offset:60 ; DAGISEL64-NEXT: scratch_load_b32 v15, off, s33 offset:64 ; DAGISEL64-NEXT: scratch_load_b32 v16, off, s33 offset:68 ; DAGISEL64-NEXT: scratch_load_b32 v17, off, s33 offset:72 ; DAGISEL64-NEXT: scratch_load_b32 v18, off, s33 offset:76 ; DAGISEL64-NEXT: scratch_load_b32 v19, off, s33 offset:80 ; DAGISEL64-NEXT: scratch_load_b32 v20, off, s33 offset:84 ; DAGISEL64-NEXT: scratch_load_b32 v21, off, s33 offset:88 ; DAGISEL64-NEXT: scratch_load_b32 v22, off, s33 offset:92 ; DAGISEL64-NEXT: scratch_load_b32 v23, off, s33 offset:96 ; DAGISEL64-NEXT: scratch_load_b32 v24, off, s33 offset:100 ; DAGISEL64-NEXT: scratch_load_b32 v25, off, s33 offset:104 ; DAGISEL64-NEXT: scratch_load_b32 v26, off, s33 offset:108 ; DAGISEL64-NEXT: scratch_load_b32 v27, off, s33 offset:112 ; DAGISEL64-NEXT: scratch_load_b32 v28, off, s33 offset:116 ; DAGISEL64-NEXT: scratch_load_b32 v29, off, s33 offset:120 ; DAGISEL64-NEXT: scratch_load_b32 v30, off, s33 offset:124 ; DAGISEL64-NEXT: scratch_load_b32 v31, off, s33 offset:128 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_load_b32 v32, off, s33 offset:132 ; DAGISEL64-NEXT: scratch_load_b32 v33, off, s33 offset:136 ; DAGISEL64-NEXT: scratch_load_b32 v34, off, s33 offset:140 ; DAGISEL64-NEXT: scratch_load_b32 v35, off, s33 offset:144 ; DAGISEL64-NEXT: scratch_load_b32 v36, off, s33 offset:148 ; DAGISEL64-NEXT: scratch_load_b32 v37, off, s33 offset:152 ; DAGISEL64-NEXT: scratch_load_b32 v38, off, s33 offset:156 ; DAGISEL64-NEXT: scratch_load_b32 v39, off, s33 offset:160 ; DAGISEL64-NEXT: scratch_load_b32 v48, off, s33 offset:164 ; DAGISEL64-NEXT: scratch_load_b32 v49, off, s33 offset:168 ; DAGISEL64-NEXT: scratch_load_b32 v50, off, s33 offset:172 ; DAGISEL64-NEXT: scratch_load_b32 v51, off, s33 offset:176 ; DAGISEL64-NEXT: scratch_load_b32 v52, off, s33 offset:180 ; DAGISEL64-NEXT: scratch_load_b32 v53, off, s33 offset:184 ; DAGISEL64-NEXT: scratch_load_b32 v54, off, s33 offset:188 ; DAGISEL64-NEXT: scratch_load_b32 v55, off, s33 offset:192 ; DAGISEL64-NEXT: scratch_load_b32 v64, off, s33 offset:196 ; DAGISEL64-NEXT: scratch_load_b32 v65, off, s33 offset:200 ; DAGISEL64-NEXT: scratch_load_b32 v66, off, s33 offset:204 ; DAGISEL64-NEXT: scratch_load_b32 v67, off, s33 offset:208 ; DAGISEL64-NEXT: scratch_load_b32 v68, off, s33 offset:212 ; DAGISEL64-NEXT: scratch_load_b32 v69, off, s33 offset:216 ; DAGISEL64-NEXT: scratch_load_b32 v70, off, s33 offset:220 ; DAGISEL64-NEXT: scratch_load_b32 v71, off, s33 offset:224 ; DAGISEL64-NEXT: scratch_load_b32 v80, off, s33 offset:228 ; DAGISEL64-NEXT: scratch_load_b32 v81, off, s33 offset:232 ; DAGISEL64-NEXT: scratch_load_b32 v82, off, s33 offset:236 ; DAGISEL64-NEXT: scratch_load_b32 v83, off, s33 offset:240 ; DAGISEL64-NEXT: scratch_load_b32 v84, off, s33 offset:244 ; DAGISEL64-NEXT: scratch_load_b32 v85, off, s33 offset:248 ; DAGISEL64-NEXT: scratch_load_b32 v86, off, s33 offset:252 ; DAGISEL64-NEXT: scratch_load_b32 v87, off, s33 offset:256 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_load_b32 v96, off, s33 offset:260 ; DAGISEL64-NEXT: scratch_load_b32 v97, off, s33 offset:264 ; DAGISEL64-NEXT: scratch_load_b32 v98, off, s33 offset:268 ; DAGISEL64-NEXT: scratch_load_b32 v99, off, s33 offset:272 ; DAGISEL64-NEXT: scratch_load_b32 v100, off, s33 offset:276 ; DAGISEL64-NEXT: scratch_load_b32 v101, off, s33 offset:280 ; DAGISEL64-NEXT: scratch_load_b32 v102, off, s33 offset:284 ; DAGISEL64-NEXT: scratch_load_b32 v103, off, s33 offset:288 ; DAGISEL64-NEXT: scratch_load_b32 v112, off, s33 offset:292 ; DAGISEL64-NEXT: scratch_load_b32 v113, off, s33 offset:296 ; DAGISEL64-NEXT: scratch_load_b32 v114, off, s33 offset:300 ; DAGISEL64-NEXT: scratch_load_b32 v115, off, s33 offset:304 ; DAGISEL64-NEXT: scratch_load_b32 v116, off, s33 offset:308 ; DAGISEL64-NEXT: scratch_load_b32 v117, off, s33 offset:312 ; DAGISEL64-NEXT: scratch_load_b32 v118, off, s33 offset:316 ; DAGISEL64-NEXT: scratch_load_b32 v119, off, s33 offset:320 ; DAGISEL64-NEXT: scratch_load_b32 v128, off, s33 offset:324 ; DAGISEL64-NEXT: scratch_load_b32 v129, off, s33 offset:328 ; DAGISEL64-NEXT: scratch_load_b32 v130, off, s33 offset:332 ; DAGISEL64-NEXT: scratch_load_b32 v131, off, s33 offset:336 ; DAGISEL64-NEXT: scratch_load_b32 v132, off, s33 offset:340 ; DAGISEL64-NEXT: scratch_load_b32 v133, off, s33 offset:344 ; DAGISEL64-NEXT: scratch_load_b32 v134, off, s33 offset:348 ; DAGISEL64-NEXT: scratch_load_b32 v135, off, s33 offset:352 ; DAGISEL64-NEXT: scratch_load_b32 v144, off, s33 offset:356 ; DAGISEL64-NEXT: scratch_load_b32 v145, off, s33 offset:360 ; DAGISEL64-NEXT: scratch_load_b32 v146, off, s33 offset:364 ; DAGISEL64-NEXT: scratch_load_b32 v147, off, s33 offset:368 ; DAGISEL64-NEXT: scratch_load_b32 v148, off, s33 offset:372 ; DAGISEL64-NEXT: scratch_load_b32 v149, off, s33 offset:376 ; DAGISEL64-NEXT: scratch_load_b32 v150, off, s33 offset:380 ; DAGISEL64-NEXT: scratch_load_b32 v151, off, s33 offset:384 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_load_b32 v160, off, s33 offset:388 ; DAGISEL64-NEXT: scratch_load_b32 v161, off, s33 offset:392 ; DAGISEL64-NEXT: scratch_load_b32 v162, off, s33 offset:396 ; DAGISEL64-NEXT: scratch_load_b32 v163, off, s33 offset:400 ; DAGISEL64-NEXT: scratch_load_b32 v164, off, s33 offset:404 ; DAGISEL64-NEXT: scratch_load_b32 v165, off, s33 offset:408 ; DAGISEL64-NEXT: scratch_load_b32 v166, off, s33 offset:412 ; DAGISEL64-NEXT: scratch_load_b32 v167, off, s33 offset:416 ; DAGISEL64-NEXT: scratch_load_b32 v176, off, s33 offset:420 ; DAGISEL64-NEXT: scratch_load_b32 v177, off, s33 offset:424 ; DAGISEL64-NEXT: scratch_load_b32 v178, off, s33 offset:428 ; DAGISEL64-NEXT: scratch_load_b32 v179, off, s33 offset:432 ; DAGISEL64-NEXT: scratch_load_b32 v180, off, s33 offset:436 ; DAGISEL64-NEXT: scratch_load_b32 v181, off, s33 offset:440 ; DAGISEL64-NEXT: scratch_load_b32 v182, off, s33 offset:444 ; DAGISEL64-NEXT: scratch_load_b32 v183, off, s33 offset:448 ; DAGISEL64-NEXT: scratch_load_b32 v192, off, s33 offset:452 ; DAGISEL64-NEXT: scratch_load_b32 v193, off, s33 offset:456 ; DAGISEL64-NEXT: scratch_load_b32 v194, off, s33 offset:460 ; DAGISEL64-NEXT: scratch_load_b32 v195, off, s33 offset:464 ; DAGISEL64-NEXT: scratch_load_b32 v196, off, s33 offset:468 ; DAGISEL64-NEXT: scratch_load_b32 v197, off, s33 offset:472 ; DAGISEL64-NEXT: scratch_load_b32 v198, off, s33 offset:476 ; DAGISEL64-NEXT: scratch_load_b32 v199, off, s33 offset:480 ; DAGISEL64-NEXT: scratch_load_b32 v208, off, s33 offset:484 ; DAGISEL64-NEXT: scratch_load_b32 v209, off, s33 offset:488 ; DAGISEL64-NEXT: scratch_load_b32 v210, off, s33 offset:492 ; DAGISEL64-NEXT: scratch_load_b32 v211, off, s33 offset:496 ; DAGISEL64-NEXT: scratch_load_b32 v212, off, s33 offset:500 ; DAGISEL64-NEXT: scratch_load_b32 v213, off, s33 offset:504 ; DAGISEL64-NEXT: scratch_load_b32 v214, off, s33 offset:508 ; DAGISEL64-NEXT: scratch_load_b32 v215, off, s33 offset:512 ; DAGISEL64-NEXT: s_clause 0xf ; DAGISEL64-NEXT: scratch_load_b32 v224, off, s33 offset:516 ; DAGISEL64-NEXT: scratch_load_b32 v225, off, s33 offset:520 ; DAGISEL64-NEXT: scratch_load_b32 v226, off, s33 offset:524 ; DAGISEL64-NEXT: scratch_load_b32 v227, off, s33 offset:528 ; DAGISEL64-NEXT: scratch_load_b32 v228, off, s33 offset:532 ; DAGISEL64-NEXT: scratch_load_b32 v229, off, s33 offset:536 ; DAGISEL64-NEXT: scratch_load_b32 v230, off, s33 offset:540 ; DAGISEL64-NEXT: scratch_load_b32 v231, off, s33 offset:544 ; DAGISEL64-NEXT: scratch_load_b32 v240, off, s33 offset:548 ; DAGISEL64-NEXT: scratch_load_b32 v241, off, s33 offset:552 ; DAGISEL64-NEXT: scratch_load_b32 v242, off, s33 offset:556 ; DAGISEL64-NEXT: scratch_load_b32 v243, off, s33 offset:560 ; DAGISEL64-NEXT: scratch_load_b32 v244, off, s33 offset:564 ; DAGISEL64-NEXT: scratch_load_b32 v245, off, s33 offset:568 ; DAGISEL64-NEXT: scratch_load_b32 v246, off, s33 offset:572 ; DAGISEL64-NEXT: scratch_load_b32 v247, off, s33 offset:576 ; DAGISEL64-NEXT: s_mov_b64 exec, s[4:5] ; DAGISEL64-NEXT: s_mov_b32 s33, s0 ; DAGISEL64-NEXT: s_wait_loadcnt 0x0 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: s_setpc_b64 s[30:31] ; ; GISEL64-LABEL: call_gfx_from_whole_wave: ; GISEL64: ; %bb.0: ; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 ; GISEL64-NEXT: s_wait_expcnt 0x0 ; GISEL64-NEXT: s_wait_samplecnt 0x0 ; GISEL64-NEXT: s_wait_bvhcnt 0x0 ; GISEL64-NEXT: s_wait_kmcnt 0x0 ; GISEL64-NEXT: s_mov_b32 s0, s33 ; GISEL64-NEXT: s_mov_b32 s33, s32 ; GISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4 ; GISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8 ; GISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12 ; GISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16 ; GISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20 ; GISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24 ; GISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28 ; GISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32 ; GISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36 ; GISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40 ; GISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44 ; GISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48 ; GISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52 ; GISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56 ; GISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60 ; GISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64 ; GISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68 ; GISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72 ; GISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76 ; GISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80 ; GISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84 ; GISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88 ; GISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92 ; GISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96 ; GISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100 ; GISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104 ; GISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108 ; GISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112 ; GISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116 ; GISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120 ; GISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124 ; GISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132 ; GISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136 ; GISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140 ; GISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144 ; GISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148 ; GISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152 ; GISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156 ; GISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160 ; GISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:164 ; GISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:168 ; GISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:172 ; GISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:176 ; GISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:180 ; GISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:184 ; GISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:188 ; GISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:192 ; GISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:196 ; GISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:200 ; GISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:204 ; GISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:208 ; GISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:212 ; GISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:216 ; GISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:220 ; GISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:224 ; GISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:228 ; GISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:232 ; GISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:236 ; GISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:240 ; GISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:244 ; GISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:248 ; GISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:252 ; GISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:256 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:260 ; GISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:264 ; GISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:268 ; GISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:272 ; GISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:276 ; GISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:280 ; GISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:284 ; GISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:288 ; GISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:292 ; GISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:296 ; GISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:300 ; GISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:304 ; GISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:308 ; GISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:312 ; GISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:316 ; GISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:320 ; GISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:324 ; GISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:328 ; GISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:332 ; GISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:336 ; GISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:340 ; GISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:344 ; GISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:348 ; GISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:352 ; GISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:356 ; GISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:360 ; GISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:364 ; GISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:368 ; GISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:372 ; GISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:376 ; GISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:380 ; GISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:384 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:388 ; GISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:392 ; GISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:396 ; GISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:400 ; GISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:404 ; GISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:408 ; GISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:412 ; GISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:416 ; GISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:420 ; GISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:424 ; GISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:428 ; GISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:432 ; GISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:436 ; GISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:440 ; GISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:444 ; GISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:448 ; GISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:452 ; GISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:456 ; GISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:460 ; GISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:464 ; GISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:468 ; GISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:472 ; GISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:476 ; GISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:480 ; GISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:484 ; GISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:488 ; GISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:492 ; GISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:496 ; GISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:500 ; GISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:504 ; GISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:508 ; GISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:512 ; GISEL64-NEXT: s_clause 0xf ; GISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:516 ; GISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:520 ; GISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:524 ; GISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:528 ; GISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:532 ; GISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:536 ; GISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:540 ; GISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:544 ; GISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:548 ; GISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:552 ; GISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:556 ; GISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:560 ; GISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:564 ; GISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:568 ; GISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:572 ; GISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:576 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: v_writelane_b32 v40, s0, 4 ; GISEL64-NEXT: v_mov_b32_e32 v2, v0 ; GISEL64-NEXT: v_swap_b32 v0, v1 ; GISEL64-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo ; GISEL64-NEXT: v_writelane_b32 v40, s4, 0 ; GISEL64-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi ; GISEL64-NEXT: s_addk_co_i32 s32, 0x250 ; GISEL64-NEXT: v_writelane_b32 v40, s5, 1 ; GISEL64-NEXT: v_writelane_b32 v40, s30, 2 ; GISEL64-NEXT: v_writelane_b32 v40, s31, 3 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GISEL64-NEXT: v_readlane_b32 s31, v40, 3 ; GISEL64-NEXT: v_readlane_b32 s30, v40, 2 ; GISEL64-NEXT: v_readlane_b32 s5, v40, 1 ; GISEL64-NEXT: v_readlane_b32 s4, v40, 0 ; GISEL64-NEXT: v_readlane_b32 s0, v40, 4 ; GISEL64-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload ; GISEL64-NEXT: s_mov_b32 s32, s33 ; GISEL64-NEXT: s_xor_b64 exec, s[4:5], -1 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_load_b32 v0, off, s33 offset:4 ; GISEL64-NEXT: scratch_load_b32 v1, off, s33 offset:8 ; GISEL64-NEXT: scratch_load_b32 v2, off, s33 offset:12 ; GISEL64-NEXT: scratch_load_b32 v3, off, s33 offset:16 ; GISEL64-NEXT: scratch_load_b32 v4, off, s33 offset:20 ; GISEL64-NEXT: scratch_load_b32 v5, off, s33 offset:24 ; GISEL64-NEXT: scratch_load_b32 v6, off, s33 offset:28 ; GISEL64-NEXT: scratch_load_b32 v7, off, s33 offset:32 ; GISEL64-NEXT: scratch_load_b32 v8, off, s33 offset:36 ; GISEL64-NEXT: scratch_load_b32 v9, off, s33 offset:40 ; GISEL64-NEXT: scratch_load_b32 v10, off, s33 offset:44 ; GISEL64-NEXT: scratch_load_b32 v11, off, s33 offset:48 ; GISEL64-NEXT: scratch_load_b32 v12, off, s33 offset:52 ; GISEL64-NEXT: scratch_load_b32 v13, off, s33 offset:56 ; GISEL64-NEXT: scratch_load_b32 v14, off, s33 offset:60 ; GISEL64-NEXT: scratch_load_b32 v15, off, s33 offset:64 ; GISEL64-NEXT: scratch_load_b32 v16, off, s33 offset:68 ; GISEL64-NEXT: scratch_load_b32 v17, off, s33 offset:72 ; GISEL64-NEXT: scratch_load_b32 v18, off, s33 offset:76 ; GISEL64-NEXT: scratch_load_b32 v19, off, s33 offset:80 ; GISEL64-NEXT: scratch_load_b32 v20, off, s33 offset:84 ; GISEL64-NEXT: scratch_load_b32 v21, off, s33 offset:88 ; GISEL64-NEXT: scratch_load_b32 v22, off, s33 offset:92 ; GISEL64-NEXT: scratch_load_b32 v23, off, s33 offset:96 ; GISEL64-NEXT: scratch_load_b32 v24, off, s33 offset:100 ; GISEL64-NEXT: scratch_load_b32 v25, off, s33 offset:104 ; GISEL64-NEXT: scratch_load_b32 v26, off, s33 offset:108 ; GISEL64-NEXT: scratch_load_b32 v27, off, s33 offset:112 ; GISEL64-NEXT: scratch_load_b32 v28, off, s33 offset:116 ; GISEL64-NEXT: scratch_load_b32 v29, off, s33 offset:120 ; GISEL64-NEXT: scratch_load_b32 v30, off, s33 offset:124 ; GISEL64-NEXT: scratch_load_b32 v31, off, s33 offset:128 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_load_b32 v32, off, s33 offset:132 ; GISEL64-NEXT: scratch_load_b32 v33, off, s33 offset:136 ; GISEL64-NEXT: scratch_load_b32 v34, off, s33 offset:140 ; GISEL64-NEXT: scratch_load_b32 v35, off, s33 offset:144 ; GISEL64-NEXT: scratch_load_b32 v36, off, s33 offset:148 ; GISEL64-NEXT: scratch_load_b32 v37, off, s33 offset:152 ; GISEL64-NEXT: scratch_load_b32 v38, off, s33 offset:156 ; GISEL64-NEXT: scratch_load_b32 v39, off, s33 offset:160 ; GISEL64-NEXT: scratch_load_b32 v48, off, s33 offset:164 ; GISEL64-NEXT: scratch_load_b32 v49, off, s33 offset:168 ; GISEL64-NEXT: scratch_load_b32 v50, off, s33 offset:172 ; GISEL64-NEXT: scratch_load_b32 v51, off, s33 offset:176 ; GISEL64-NEXT: scratch_load_b32 v52, off, s33 offset:180 ; GISEL64-NEXT: scratch_load_b32 v53, off, s33 offset:184 ; GISEL64-NEXT: scratch_load_b32 v54, off, s33 offset:188 ; GISEL64-NEXT: scratch_load_b32 v55, off, s33 offset:192 ; GISEL64-NEXT: scratch_load_b32 v64, off, s33 offset:196 ; GISEL64-NEXT: scratch_load_b32 v65, off, s33 offset:200 ; GISEL64-NEXT: scratch_load_b32 v66, off, s33 offset:204 ; GISEL64-NEXT: scratch_load_b32 v67, off, s33 offset:208 ; GISEL64-NEXT: scratch_load_b32 v68, off, s33 offset:212 ; GISEL64-NEXT: scratch_load_b32 v69, off, s33 offset:216 ; GISEL64-NEXT: scratch_load_b32 v70, off, s33 offset:220 ; GISEL64-NEXT: scratch_load_b32 v71, off, s33 offset:224 ; GISEL64-NEXT: scratch_load_b32 v80, off, s33 offset:228 ; GISEL64-NEXT: scratch_load_b32 v81, off, s33 offset:232 ; GISEL64-NEXT: scratch_load_b32 v82, off, s33 offset:236 ; GISEL64-NEXT: scratch_load_b32 v83, off, s33 offset:240 ; GISEL64-NEXT: scratch_load_b32 v84, off, s33 offset:244 ; GISEL64-NEXT: scratch_load_b32 v85, off, s33 offset:248 ; GISEL64-NEXT: scratch_load_b32 v86, off, s33 offset:252 ; GISEL64-NEXT: scratch_load_b32 v87, off, s33 offset:256 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_load_b32 v96, off, s33 offset:260 ; GISEL64-NEXT: scratch_load_b32 v97, off, s33 offset:264 ; GISEL64-NEXT: scratch_load_b32 v98, off, s33 offset:268 ; GISEL64-NEXT: scratch_load_b32 v99, off, s33 offset:272 ; GISEL64-NEXT: scratch_load_b32 v100, off, s33 offset:276 ; GISEL64-NEXT: scratch_load_b32 v101, off, s33 offset:280 ; GISEL64-NEXT: scratch_load_b32 v102, off, s33 offset:284 ; GISEL64-NEXT: scratch_load_b32 v103, off, s33 offset:288 ; GISEL64-NEXT: scratch_load_b32 v112, off, s33 offset:292 ; GISEL64-NEXT: scratch_load_b32 v113, off, s33 offset:296 ; GISEL64-NEXT: scratch_load_b32 v114, off, s33 offset:300 ; GISEL64-NEXT: scratch_load_b32 v115, off, s33 offset:304 ; GISEL64-NEXT: scratch_load_b32 v116, off, s33 offset:308 ; GISEL64-NEXT: scratch_load_b32 v117, off, s33 offset:312 ; GISEL64-NEXT: scratch_load_b32 v118, off, s33 offset:316 ; GISEL64-NEXT: scratch_load_b32 v119, off, s33 offset:320 ; GISEL64-NEXT: scratch_load_b32 v128, off, s33 offset:324 ; GISEL64-NEXT: scratch_load_b32 v129, off, s33 offset:328 ; GISEL64-NEXT: scratch_load_b32 v130, off, s33 offset:332 ; GISEL64-NEXT: scratch_load_b32 v131, off, s33 offset:336 ; GISEL64-NEXT: scratch_load_b32 v132, off, s33 offset:340 ; GISEL64-NEXT: scratch_load_b32 v133, off, s33 offset:344 ; GISEL64-NEXT: scratch_load_b32 v134, off, s33 offset:348 ; GISEL64-NEXT: scratch_load_b32 v135, off, s33 offset:352 ; GISEL64-NEXT: scratch_load_b32 v144, off, s33 offset:356 ; GISEL64-NEXT: scratch_load_b32 v145, off, s33 offset:360 ; GISEL64-NEXT: scratch_load_b32 v146, off, s33 offset:364 ; GISEL64-NEXT: scratch_load_b32 v147, off, s33 offset:368 ; GISEL64-NEXT: scratch_load_b32 v148, off, s33 offset:372 ; GISEL64-NEXT: scratch_load_b32 v149, off, s33 offset:376 ; GISEL64-NEXT: scratch_load_b32 v150, off, s33 offset:380 ; GISEL64-NEXT: scratch_load_b32 v151, off, s33 offset:384 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_load_b32 v160, off, s33 offset:388 ; GISEL64-NEXT: scratch_load_b32 v161, off, s33 offset:392 ; GISEL64-NEXT: scratch_load_b32 v162, off, s33 offset:396 ; GISEL64-NEXT: scratch_load_b32 v163, off, s33 offset:400 ; GISEL64-NEXT: scratch_load_b32 v164, off, s33 offset:404 ; GISEL64-NEXT: scratch_load_b32 v165, off, s33 offset:408 ; GISEL64-NEXT: scratch_load_b32 v166, off, s33 offset:412 ; GISEL64-NEXT: scratch_load_b32 v167, off, s33 offset:416 ; GISEL64-NEXT: scratch_load_b32 v176, off, s33 offset:420 ; GISEL64-NEXT: scratch_load_b32 v177, off, s33 offset:424 ; GISEL64-NEXT: scratch_load_b32 v178, off, s33 offset:428 ; GISEL64-NEXT: scratch_load_b32 v179, off, s33 offset:432 ; GISEL64-NEXT: scratch_load_b32 v180, off, s33 offset:436 ; GISEL64-NEXT: scratch_load_b32 v181, off, s33 offset:440 ; GISEL64-NEXT: scratch_load_b32 v182, off, s33 offset:444 ; GISEL64-NEXT: scratch_load_b32 v183, off, s33 offset:448 ; GISEL64-NEXT: scratch_load_b32 v192, off, s33 offset:452 ; GISEL64-NEXT: scratch_load_b32 v193, off, s33 offset:456 ; GISEL64-NEXT: scratch_load_b32 v194, off, s33 offset:460 ; GISEL64-NEXT: scratch_load_b32 v195, off, s33 offset:464 ; GISEL64-NEXT: scratch_load_b32 v196, off, s33 offset:468 ; GISEL64-NEXT: scratch_load_b32 v197, off, s33 offset:472 ; GISEL64-NEXT: scratch_load_b32 v198, off, s33 offset:476 ; GISEL64-NEXT: scratch_load_b32 v199, off, s33 offset:480 ; GISEL64-NEXT: scratch_load_b32 v208, off, s33 offset:484 ; GISEL64-NEXT: scratch_load_b32 v209, off, s33 offset:488 ; GISEL64-NEXT: scratch_load_b32 v210, off, s33 offset:492 ; GISEL64-NEXT: scratch_load_b32 v211, off, s33 offset:496 ; GISEL64-NEXT: scratch_load_b32 v212, off, s33 offset:500 ; GISEL64-NEXT: scratch_load_b32 v213, off, s33 offset:504 ; GISEL64-NEXT: scratch_load_b32 v214, off, s33 offset:508 ; GISEL64-NEXT: scratch_load_b32 v215, off, s33 offset:512 ; GISEL64-NEXT: s_clause 0xf ; GISEL64-NEXT: scratch_load_b32 v224, off, s33 offset:516 ; GISEL64-NEXT: scratch_load_b32 v225, off, s33 offset:520 ; GISEL64-NEXT: scratch_load_b32 v226, off, s33 offset:524 ; GISEL64-NEXT: scratch_load_b32 v227, off, s33 offset:528 ; GISEL64-NEXT: scratch_load_b32 v228, off, s33 offset:532 ; GISEL64-NEXT: scratch_load_b32 v229, off, s33 offset:536 ; GISEL64-NEXT: scratch_load_b32 v230, off, s33 offset:540 ; GISEL64-NEXT: scratch_load_b32 v231, off, s33 offset:544 ; GISEL64-NEXT: scratch_load_b32 v240, off, s33 offset:548 ; GISEL64-NEXT: scratch_load_b32 v241, off, s33 offset:552 ; GISEL64-NEXT: scratch_load_b32 v242, off, s33 offset:556 ; GISEL64-NEXT: scratch_load_b32 v243, off, s33 offset:560 ; GISEL64-NEXT: scratch_load_b32 v244, off, s33 offset:564 ; GISEL64-NEXT: scratch_load_b32 v245, off, s33 offset:568 ; GISEL64-NEXT: scratch_load_b32 v246, off, s33 offset:572 ; GISEL64-NEXT: scratch_load_b32 v247, off, s33 offset:576 ; GISEL64-NEXT: s_mov_b64 exec, s[4:5] ; GISEL64-NEXT: s_mov_b32 s33, s0 ; GISEL64-NEXT: s_wait_loadcnt 0x0 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: s_setpc_b64 s[30:31] %ret = call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent ret <2 x half> %ret }