; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10_1 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10_3 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck -check-prefix=GFX12 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s ; We aren't pressuring the SGPRs, so this can use the add with carry out pre-gfx9. define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 ; GFX10_1-NEXT: v_readfirstlane_b32 s55, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 ; GFX10_3-NEXT: v_readfirstlane_b32 s55, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 ; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s32, 0x4040 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v1, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 ; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 ; GFX12-NEXT: s_add_co_ci_u32 s0, s32, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_bitcmp1_b32 s0, 0 ; GFX12-NEXT: s_bitset0_b32 s0, 0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 s55, s0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: v_readlane_b32 s55, v1, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: s_movk_i32 s55, 0x4040 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s55, v0 ; GFX8-NEXT: v_readfirstlane_b32 s55, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s55, scc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v1, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: v_readfirstlane_b32 s55, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s55, scc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v1, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s32, 0x4040 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55, scc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v1, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1, i32 0) ret void } ; %alloca1 should end up materializing with s_mov_b32, and scc is ; available. define void @scalar_mov_materializes_frame_index_dead_scc() #0 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5 ; GFX10_1-NEXT: s_addk_i32 s55, 0x4040 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s55 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5 ; GFX10_3-NEXT: s_addk_i32 s55, 0x4040 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s55 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 ; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_add_i32 s0, s32, 0x4040 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v1, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 ; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 ; GFX12-NEXT: s_add_co_i32 s0, s32, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 s55, s0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: v_readlane_b32 s55, v1, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: s_lshr_b32 s55, s32, 6 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: s_addk_i32 s55, 0x4040 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s55 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v1, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: s_lshr_b32 s55, s32, 6 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: s_addk_i32 s55, 0x4040 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s55 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v1, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_add_i32 s0, s32, 0x4040 ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55 ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v1, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) call void asm sideeffect "; use $0", "{s55}"(ptr addrspace(5) %alloca1) ret void } define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_mov_b32 s5, s33 ; GFX10_1-NEXT: s_mov_b32 s33, s32 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880 ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: s_mov_b32 s32, s33 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 ; GFX10_1-NEXT: v_readfirstlane_b32 s55, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880 ; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: s_mov_b32 s33, s5 ; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_mov_b32 s5, s33 ; GFX10_3-NEXT: s_mov_b32 s33, s32 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: s_mov_b32 s32, s33 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 ; GFX10_3-NEXT: v_readfirstlane_b32 s55, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 ; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: s_mov_b32 s33, s5 ; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s1, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s2, s33, 0x4044 ; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0x4080 ; GFX11-NEXT: s_add_i32 s0, s33, 64 ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s33, 0x4040 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v1, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s2, s33, 0x4044 ; GFX11-NEXT: scratch_load_b32 v1, off, s2 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_mov_b32 s33, s1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s1, s33 ; GFX12-NEXT: s_mov_b32 s33, s32 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_store_b32 off, v1, s33 offset:16388 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 ; GFX12-NEXT: s_add_co_ci_u32 s0, s33, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s33 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_bitcmp1_b32 s0, 0 ; GFX12-NEXT: s_bitset0_b32 s0, 0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 s55, s0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: v_readlane_b32 s55, v1, 0 ; GFX12-NEXT: s_mov_b32 s32, s33 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v1, off, s33 offset:16388 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_mov_b32 s33, s1 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 s6, s33 ; GFX8-NEXT: s_mov_b32 s33, s32 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX8-NEXT: s_movk_i32 s55, 0x4040 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s55, v0 ; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX8-NEXT: v_readfirstlane_b32 s55, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s55, scc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v1, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 ; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_mov_b32 s33, s6 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s6, s33 ; GFX900-NEXT: s_mov_b32 s33, s32 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 ; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: v_readfirstlane_b32 s55, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s55, scc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v1, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 ; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_mov_b32 s33, s6 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_mov_b32 s2, s33 ; GFX942-NEXT: s_mov_b32 s33, s32 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s3, s33, 0x4044 ; GFX942-NEXT: scratch_store_dword off, v1, s3 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_addk_i32 s32, 0x4080 ; GFX942-NEXT: s_add_i32 s0, s33, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s33, 0x4040 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55, scc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v1, 0 ; GFX942-NEXT: s_mov_b32 s32, s33 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s3, s33, 0x4044 ; GFX942-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_mov_b32 s33, s2 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1, i32 0) ret void } define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() #0 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 ; GFX10_1-NEXT: v_readfirstlane_b32 s55, v1 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 ; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 ; GFX10_3-NEXT: v_readfirstlane_b32 s55, v1 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 ; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 ; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: v_writelane_b32 v0, s55, 0 ; GFX11-NEXT: s_addc_u32 s0, s32, 64 ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v0, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 ; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_mov_b32 s55, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_readlane_b32 s55, v0, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_writelane_b32 v0, s55, 0 ; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s32 ; GFX8-NEXT: s_mov_b32 s55, 64 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, s55, v1 ; GFX8-NEXT: v_readfirstlane_b32 s55, v1 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s55, scc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v0, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 ; GFX900-NEXT: v_writelane_b32 v0, s55, 0 ; GFX900-NEXT: v_readfirstlane_b32 s55, v1 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s55, scc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 ; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s32, 64 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 ; GFX942-NEXT: v_writelane_b32 v0, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55, scc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 ; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca0, i32 0) ret void } define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5 ; GFX10_1-NEXT: s_add_i32 s55, s55, 64 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s55 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 ; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5 ; GFX10_3-NEXT: s_add_i32 s55, s55, 64 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s55 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 ; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 ; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v0, s55, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v0, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 ; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_mov_b32 s55, s32 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_readlane_b32 s55, v0, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_writelane_b32 v0, s55, 0 ; GFX8-NEXT: s_lshr_b32 s55, s32, 6 ; GFX8-NEXT: s_add_i32 s55, s55, 64 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s55 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v0, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s55, 0 ; GFX900-NEXT: s_lshr_b32 s55, s32, 6 ; GFX900-NEXT: s_add_i32 s55, s55, 64 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s55 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 ; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_writelane_b32 v0, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55 ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 ; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) call void asm sideeffect "; use $0", "{s55}"(ptr addrspace(5) %alloca0) ret void } define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp() #1 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_mov_b32 s5, s33 ; GFX10_1-NEXT: s_mov_b32 s33, s32 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s33 ; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: s_mov_b32 s32, s33 ; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 ; GFX10_1-NEXT: v_readfirstlane_b32 s55, v1 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 ; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: s_mov_b32 s33, s5 ; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_mov_b32 s5, s33 ; GFX10_3-NEXT: s_mov_b32 s33, s32 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s33 ; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: s_mov_b32 s32, s33 ; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 ; GFX10_3-NEXT: v_readfirstlane_b32 s55, v1 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 ; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: s_mov_b32 s33, s5 ; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s1, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 ; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0x4080 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: v_writelane_b32 v0, s55, 0 ; GFX11-NEXT: s_addc_u32 s0, s33, 64 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v0, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 ; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_mov_b32 s33, s1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s1, s33 ; GFX12-NEXT: s_mov_b32 s33, s32 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 ; GFX12-NEXT: s_mov_b32 s55, s33 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: v_readlane_b32 s55, v0, 0 ; GFX12-NEXT: s_mov_b32 s32, s33 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_mov_b32 s33, s1 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 s6, s33 ; GFX8-NEXT: s_mov_b32 s33, s32 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_writelane_b32 v0, s55, 0 ; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GFX8-NEXT: s_mov_b32 s55, 64 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, s55, v1 ; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX8-NEXT: v_readfirstlane_b32 s55, v1 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s55, scc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v0, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_mov_b32 s33, s6 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s6, s33 ; GFX900-NEXT: s_mov_b32 s33, s32 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 ; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX900-NEXT: v_writelane_b32 v0, s55, 0 ; GFX900-NEXT: v_readfirstlane_b32 s55, v1 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s55, scc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v0, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_mov_b32 s33, s6 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_mov_b32 s2, s33 ; GFX942-NEXT: s_mov_b32 s33, s32 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s3, s33, 0x4040 ; GFX942-NEXT: scratch_store_dword off, v0, s3 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_addk_i32 s32, 0x4080 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s33, 64 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 ; GFX942-NEXT: v_writelane_b32 v0, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55, scc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v0, 0 ; GFX942-NEXT: s_mov_b32 s32, s33 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s3, s33, 0x4040 ; GFX942-NEXT: scratch_load_dword v0, off, s3 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_mov_b32 s33, s2 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca0, i32 0) ret void } define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() #1 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_mov_b32 s4, s33 ; GFX10_1-NEXT: s_mov_b32 s33, s32 ; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1 ; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 ; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 ; GFX10_1-NEXT: s_lshr_b32 s55, s33, 5 ; GFX10_1-NEXT: s_mov_b32 s32, s33 ; GFX10_1-NEXT: s_add_i32 s55, s55, 64 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s55 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1 ; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 ; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 ; GFX10_1-NEXT: s_mov_b32 s33, s4 ; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_mov_b32 s4, s33 ; GFX10_3-NEXT: s_mov_b32 s33, s32 ; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1 ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 ; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 ; GFX10_3-NEXT: s_lshr_b32 s55, s33, 5 ; GFX10_3-NEXT: s_mov_b32 s32, s33 ; GFX10_3-NEXT: s_add_i32 s55, s55, 64 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s55 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1 ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 ; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 ; GFX10_3-NEXT: s_mov_b32 s33, s4 ; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 ; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 ; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 ; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v0, s55, 0 ; GFX11-NEXT: s_addk_i32 s32, 0x4080 ; GFX11-NEXT: s_add_i32 s1, s33, 64 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_mov_b32 s55, s1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v0, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 ; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 ; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s0, s33 ; GFX12-NEXT: s_mov_b32 s33, s32 ; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 ; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s1 ; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 ; GFX12-NEXT: s_mov_b32 s55, s33 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_mov_b32 s32, s33 ; GFX12-NEXT: v_readlane_b32 s55, v0, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 ; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s1 ; GFX12-NEXT: s_mov_b32 s33, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 s4, s33 ; GFX8-NEXT: s_mov_b32 s33, s32 ; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; GFX8-NEXT: s_add_i32 s5, s33, 0x101000 ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[6:7] ; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX8-NEXT: v_writelane_b32 v0, s55, 0 ; GFX8-NEXT: s_lshr_b32 s55, s33, 6 ; GFX8-NEXT: s_add_i32 s55, s55, 64 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s55 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v0, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; GFX8-NEXT: s_add_i32 s5, s33, 0x101000 ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[6:7] ; GFX8-NEXT: s_mov_b32 s33, s4 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, s33 ; GFX900-NEXT: s_mov_b32 s33, s32 ; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; GFX900-NEXT: s_add_i32 s5, s33, 0x101000 ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[6:7] ; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX900-NEXT: v_writelane_b32 v0, s55, 0 ; GFX900-NEXT: s_lshr_b32 s55, s33, 6 ; GFX900-NEXT: s_add_i32 s55, s55, 64 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s55 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v0, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; GFX900-NEXT: s_add_i32 s5, s33, 0x101000 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[6:7] ; GFX900-NEXT: s_mov_b32 s33, s4 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_mov_b32 s0, s33 ; GFX942-NEXT: s_mov_b32 s33, s32 ; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; GFX942-NEXT: s_add_i32 s1, s33, 0x4040 ; GFX942-NEXT: scratch_store_dword off, v0, s1 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[2:3] ; GFX942-NEXT: s_addk_i32 s32, 0x4080 ; GFX942-NEXT: s_add_i32 s1, s33, 64 ; GFX942-NEXT: v_writelane_b32 v0, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55 ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v0, 0 ; GFX942-NEXT: s_mov_b32 s32, s33 ; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; GFX942-NEXT: s_add_i32 s1, s33, 0x4040 ; GFX942-NEXT: scratch_load_dword v0, off, s1 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[2:3] ; GFX942-NEXT: s_mov_b32 s33, s0 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) call void asm sideeffect "; use $0", "{s55}"(ptr addrspace(5) %alloca0) ret void } define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset() #0 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5 ; GFX10_1-NEXT: s_add_i32 s55, s4, 0x442c ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5 ; GFX10_3-NEXT: s_add_i32 s55, s4, 0x442c ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 ; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: s_add_i32 s55, s32, 0x442c ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v1, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 ; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 ; GFX12-NEXT: s_add_co_i32 s55, s32, 0x43ec ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: v_readlane_b32 s55, v1, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_lshr_b32 s4, s32, 6 ; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: s_add_i32 s55, s4, 0x442c ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s55, scc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v1, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_lshr_b32 s4, s32, 6 ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: s_add_i32 s55, s4, 0x442c ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s55, scc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v1, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 ; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_add_i32 s55, s32, 0x442c ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55, scc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v1, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 ; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca [4096 x i32], align 4, addrspace(5) %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 251 call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0) ret void } define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset(i32 inreg %soffset) #0 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshl_b32 s4, s16, 2 ; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5 ; GFX10_1-NEXT: s_add_i32 s55, s55, s4 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: s_addk_i32 s55, 0x4040 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshl_b32 s4, s16, 2 ; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5 ; GFX10_3-NEXT: s_add_i32 s55, s55, s4 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: s_addk_i32 s55, 0x4040 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 ; GFX11-NEXT: s_add_i32 s2, s32, 0x8040 ; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s1, s32, 64 ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-NEXT: s_add_i32 s55, s32, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_addk_i32 s55, 0x4040 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v1, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 ; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 ; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s1 ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 ; GFX12-NEXT: s_lshl_b32 s0, s0, 2 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_add_co_i32 s55, s32, s0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_addk_co_i32 s55, 0x4000 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: v_readlane_b32 s55, v1, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: s_lshl_b32 s4, s16, 2 ; GFX8-NEXT: s_lshr_b32 s55, s32, 6 ; GFX8-NEXT: s_add_i32 s55, s55, s4 ; GFX8-NEXT: s_addk_i32 s55, 0x4040 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s55, scc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v1, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: s_lshl_b32 s4, s16, 2 ; GFX900-NEXT: s_lshr_b32 s55, s32, 6 ; GFX900-NEXT: s_add_i32 s55, s55, s4 ; GFX900-NEXT: s_addk_i32 s55, 0x4040 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s55, scc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v1, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; GFX942-NEXT: s_add_i32 s1, s32, 0x8040 ; GFX942-NEXT: scratch_store_dword off, v1, s1 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[2:3] ; GFX942-NEXT: s_lshl_b32 s0, s0, 2 ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_add_i32 s55, s32, s0 ; GFX942-NEXT: s_addk_i32 s55, 0x4040 ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55, scc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v1, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 ; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca [4096 x i32], align 4, addrspace(5) %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 %soffset call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0) ret void } attributes #0 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" } attributes #1 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" "frame-pointer"="all" } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX9: {{.*}}