; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10_1 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10_3 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s %asm.output = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs <16 x i32>, <7 x i32>, ; vgprs i64 ; vcc } %asm.output2 = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs <16 x i32>, <5 x i32>, ; vgprs i64 ; vcc } %asm.output3 = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, ; sgprs <16 x i32>, <6 x i32>, ; vgprs i64 ; vcc } ; %alloca1 should end up materializing with s_mov_b32, but scc is ; unavailable. ; ; This is primarily to test gfx7 and gfx8, which do not have vector ; add with no carry. ; define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 { ; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: v_writelane_b32 v23, s30, 0 ; GFX7-NEXT: v_writelane_b32 v23, s31, 1 ; GFX7-NEXT: v_writelane_b32 v23, s33, 2 ; GFX7-NEXT: v_writelane_b32 v23, s34, 3 ; GFX7-NEXT: v_writelane_b32 v23, s35, 4 ; GFX7-NEXT: v_writelane_b32 v23, s36, 5 ; GFX7-NEXT: v_writelane_b32 v23, s37, 6 ; GFX7-NEXT: v_writelane_b32 v23, s38, 7 ; GFX7-NEXT: v_writelane_b32 v23, s39, 8 ; GFX7-NEXT: v_writelane_b32 v23, s48, 9 ; GFX7-NEXT: v_writelane_b32 v23, s49, 10 ; GFX7-NEXT: v_writelane_b32 v23, s50, 11 ; GFX7-NEXT: v_writelane_b32 v23, s51, 12 ; GFX7-NEXT: v_writelane_b32 v23, s52, 13 ; GFX7-NEXT: v_writelane_b32 v23, s53, 14 ; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 ; GFX7-NEXT: v_writelane_b32 v23, s54, 15 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec ; GFX7-NEXT: v_writelane_b32 v23, s55, 16 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use alloca0 v0 ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX7-NEXT: v_mov_b32_e32 v0, 0x4040 ; GFX7-NEXT: v_mad_u32_u24 v0, v0, 64, s32 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 6, v0 ; GFX7-NEXT: v_readfirstlane_b32 s54, v0 ; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: v_readlane_b32 s55, v23, 16 ; GFX7-NEXT: v_readlane_b32 s54, v23, 15 ; GFX7-NEXT: v_readlane_b32 s53, v23, 14 ; GFX7-NEXT: v_readlane_b32 s52, v23, 13 ; GFX7-NEXT: v_readlane_b32 s51, v23, 12 ; GFX7-NEXT: v_readlane_b32 s50, v23, 11 ; GFX7-NEXT: v_readlane_b32 s49, v23, 10 ; GFX7-NEXT: v_readlane_b32 s48, v23, 9 ; GFX7-NEXT: v_readlane_b32 s39, v23, 8 ; GFX7-NEXT: v_readlane_b32 s38, v23, 7 ; GFX7-NEXT: v_readlane_b32 s37, v23, 6 ; GFX7-NEXT: v_readlane_b32 s36, v23, 5 ; GFX7-NEXT: v_readlane_b32 s35, v23, 4 ; GFX7-NEXT: v_readlane_b32 s34, v23, 3 ; GFX7-NEXT: v_readlane_b32 s33, v23, 2 ; GFX7-NEXT: v_readlane_b32 s31, v23, 1 ; GFX7-NEXT: v_readlane_b32 s30, v23, 0 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_writelane_b32 v23, s30, 0 ; GFX8-NEXT: v_writelane_b32 v23, s31, 1 ; GFX8-NEXT: v_writelane_b32 v23, s33, 2 ; GFX8-NEXT: v_writelane_b32 v23, s34, 3 ; GFX8-NEXT: v_writelane_b32 v23, s35, 4 ; GFX8-NEXT: v_writelane_b32 v23, s36, 5 ; GFX8-NEXT: v_writelane_b32 v23, s37, 6 ; GFX8-NEXT: v_writelane_b32 v23, s38, 7 ; GFX8-NEXT: v_writelane_b32 v23, s39, 8 ; GFX8-NEXT: v_writelane_b32 v23, s48, 9 ; GFX8-NEXT: v_writelane_b32 v23, s49, 10 ; GFX8-NEXT: v_writelane_b32 v23, s50, 11 ; GFX8-NEXT: v_writelane_b32 v23, s51, 12 ; GFX8-NEXT: v_writelane_b32 v23, s52, 13 ; GFX8-NEXT: v_writelane_b32 v23, s53, 14 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_writelane_b32 v23, s54, 15 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: v_writelane_b32 v23, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX8-NEXT: v_mov_b32_e32 v0, 0x4040 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, 64, s32 ; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 6, v0 ; GFX8-NEXT: v_readfirstlane_b32 s54, v0 ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v23, 16 ; GFX8-NEXT: v_readlane_b32 s54, v23, 15 ; GFX8-NEXT: v_readlane_b32 s53, v23, 14 ; GFX8-NEXT: v_readlane_b32 s52, v23, 13 ; GFX8-NEXT: v_readlane_b32 s51, v23, 12 ; GFX8-NEXT: v_readlane_b32 s50, v23, 11 ; GFX8-NEXT: v_readlane_b32 s49, v23, 10 ; GFX8-NEXT: v_readlane_b32 s48, v23, 9 ; GFX8-NEXT: v_readlane_b32 s39, v23, 8 ; GFX8-NEXT: v_readlane_b32 s38, v23, 7 ; GFX8-NEXT: v_readlane_b32 s37, v23, 6 ; GFX8-NEXT: v_readlane_b32 s36, v23, 5 ; GFX8-NEXT: v_readlane_b32 s35, v23, 4 ; GFX8-NEXT: v_readlane_b32 s34, v23, 3 ; GFX8-NEXT: v_readlane_b32 s33, v23, 2 ; GFX8-NEXT: v_readlane_b32 s31, v23, 1 ; GFX8-NEXT: v_readlane_b32 s30, v23, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v23, s30, 0 ; GFX900-NEXT: v_writelane_b32 v23, s31, 1 ; GFX900-NEXT: v_writelane_b32 v23, s33, 2 ; GFX900-NEXT: v_writelane_b32 v23, s34, 3 ; GFX900-NEXT: v_writelane_b32 v23, s35, 4 ; GFX900-NEXT: v_writelane_b32 v23, s36, 5 ; GFX900-NEXT: v_writelane_b32 v23, s37, 6 ; GFX900-NEXT: v_writelane_b32 v23, s38, 7 ; GFX900-NEXT: v_writelane_b32 v23, s39, 8 ; GFX900-NEXT: v_writelane_b32 v23, s48, 9 ; GFX900-NEXT: v_writelane_b32 v23, s49, 10 ; GFX900-NEXT: v_writelane_b32 v23, s50, 11 ; GFX900-NEXT: v_writelane_b32 v23, s51, 12 ; GFX900-NEXT: v_writelane_b32 v23, s52, 13 ; GFX900-NEXT: v_writelane_b32 v23, s53, 14 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_writelane_b32 v23, s54, 15 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: v_writelane_b32 v23, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 ; GFX900-NEXT: v_readfirstlane_b32 s54, v0 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v23, 16 ; GFX900-NEXT: v_readlane_b32 s54, v23, 15 ; GFX900-NEXT: v_readlane_b32 s53, v23, 14 ; GFX900-NEXT: v_readlane_b32 s52, v23, 13 ; GFX900-NEXT: v_readlane_b32 s51, v23, 12 ; GFX900-NEXT: v_readlane_b32 s50, v23, 11 ; GFX900-NEXT: v_readlane_b32 s49, v23, 10 ; GFX900-NEXT: v_readlane_b32 s48, v23, 9 ; GFX900-NEXT: v_readlane_b32 s39, v23, 8 ; GFX900-NEXT: v_readlane_b32 s38, v23, 7 ; GFX900-NEXT: v_readlane_b32 s37, v23, 6 ; GFX900-NEXT: v_readlane_b32 s36, v23, 5 ; GFX900-NEXT: v_readlane_b32 s35, v23, 4 ; GFX900-NEXT: v_readlane_b32 s34, v23, 3 ; GFX900-NEXT: v_readlane_b32 s33, v23, 2 ; GFX900-NEXT: v_readlane_b32 s31, v23, 1 ; GFX900-NEXT: v_readlane_b32 s30, v23, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_store_dword off, v23, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v23, s30, 0 ; GFX942-NEXT: v_writelane_b32 v23, s31, 1 ; GFX942-NEXT: v_writelane_b32 v23, s33, 2 ; GFX942-NEXT: v_writelane_b32 v23, s34, 3 ; GFX942-NEXT: v_writelane_b32 v23, s35, 4 ; GFX942-NEXT: v_writelane_b32 v23, s36, 5 ; GFX942-NEXT: v_writelane_b32 v23, s37, 6 ; GFX942-NEXT: v_writelane_b32 v23, s38, 7 ; GFX942-NEXT: v_writelane_b32 v23, s39, 8 ; GFX942-NEXT: v_writelane_b32 v23, s48, 9 ; GFX942-NEXT: v_writelane_b32 v23, s49, 10 ; GFX942-NEXT: v_writelane_b32 v23, s50, 11 ; GFX942-NEXT: v_writelane_b32 v23, s51, 12 ; GFX942-NEXT: v_writelane_b32 v23, s52, 13 ; GFX942-NEXT: v_writelane_b32 v23, s53, 14 ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_writelane_b32 v23, s54, 15 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec ; GFX942-NEXT: v_writelane_b32 v23, s55, 16 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_addc_u32 s59, s32, 0x4040 ; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX942-NEXT: s_bitcmp1_b32 s59, 0 ; GFX942-NEXT: s_bitset0_b32 s59, 0 ; GFX942-NEXT: s_mov_b32 s54, s59 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v23, 16 ; GFX942-NEXT: v_readlane_b32 s54, v23, 15 ; GFX942-NEXT: v_readlane_b32 s53, v23, 14 ; GFX942-NEXT: v_readlane_b32 s52, v23, 13 ; GFX942-NEXT: v_readlane_b32 s51, v23, 12 ; GFX942-NEXT: v_readlane_b32 s50, v23, 11 ; GFX942-NEXT: v_readlane_b32 s49, v23, 10 ; GFX942-NEXT: v_readlane_b32 s48, v23, 9 ; GFX942-NEXT: v_readlane_b32 s39, v23, 8 ; GFX942-NEXT: v_readlane_b32 s38, v23, 7 ; GFX942-NEXT: v_readlane_b32 s37, v23, 6 ; GFX942-NEXT: v_readlane_b32 s36, v23, 5 ; GFX942-NEXT: v_readlane_b32 s35, v23, 4 ; GFX942-NEXT: v_readlane_b32 s34, v23, 3 ; GFX942-NEXT: v_readlane_b32 s33, v23, 2 ; GFX942-NEXT: v_readlane_b32 s31, v23, 1 ; GFX942-NEXT: v_readlane_b32 s30, v23, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_1-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_writelane_b32 v23, s30, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: v_writelane_b32 v23, s31, 1 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_writelane_b32 v23, s33, 2 ; GFX10_1-NEXT: v_writelane_b32 v23, s34, 3 ; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4 ; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5 ; GFX10_1-NEXT: v_writelane_b32 v23, s37, 6 ; GFX10_1-NEXT: v_writelane_b32 v23, s38, 7 ; GFX10_1-NEXT: v_writelane_b32 v23, s39, 8 ; GFX10_1-NEXT: v_writelane_b32 v23, s48, 9 ; GFX10_1-NEXT: v_writelane_b32 v23, s49, 10 ; GFX10_1-NEXT: v_writelane_b32 v23, s50, 11 ; GFX10_1-NEXT: v_writelane_b32 v23, s51, 12 ; GFX10_1-NEXT: v_writelane_b32 v23, s52, 13 ; GFX10_1-NEXT: v_writelane_b32 v23, s53, 14 ; GFX10_1-NEXT: v_writelane_b32 v23, s54, 15 ; GFX10_1-NEXT: v_writelane_b32 v23, s55, 16 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v24, 5, s32 ; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX10_1-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24 ; GFX10_1-NEXT: v_readfirstlane_b32 s54, v24 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v23, 16 ; GFX10_1-NEXT: v_readlane_b32 s54, v23, 15 ; GFX10_1-NEXT: v_readlane_b32 s53, v23, 14 ; GFX10_1-NEXT: v_readlane_b32 s52, v23, 13 ; GFX10_1-NEXT: v_readlane_b32 s51, v23, 12 ; GFX10_1-NEXT: v_readlane_b32 s50, v23, 11 ; GFX10_1-NEXT: v_readlane_b32 s49, v23, 10 ; GFX10_1-NEXT: v_readlane_b32 s48, v23, 9 ; GFX10_1-NEXT: v_readlane_b32 s39, v23, 8 ; GFX10_1-NEXT: v_readlane_b32 s38, v23, 7 ; GFX10_1-NEXT: v_readlane_b32 s37, v23, 6 ; GFX10_1-NEXT: v_readlane_b32 s36, v23, 5 ; GFX10_1-NEXT: v_readlane_b32 s35, v23, 4 ; GFX10_1-NEXT: v_readlane_b32 s34, v23, 3 ; GFX10_1-NEXT: v_readlane_b32 s33, v23, 2 ; GFX10_1-NEXT: v_readlane_b32 s31, v23, 1 ; GFX10_1-NEXT: v_readlane_b32 s30, v23, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_1-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_3-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_writelane_b32 v23, s30, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: v_writelane_b32 v23, s31, 1 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_writelane_b32 v23, s33, 2 ; GFX10_3-NEXT: v_writelane_b32 v23, s34, 3 ; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4 ; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5 ; GFX10_3-NEXT: v_writelane_b32 v23, s37, 6 ; GFX10_3-NEXT: v_writelane_b32 v23, s38, 7 ; GFX10_3-NEXT: v_writelane_b32 v23, s39, 8 ; GFX10_3-NEXT: v_writelane_b32 v23, s48, 9 ; GFX10_3-NEXT: v_writelane_b32 v23, s49, 10 ; GFX10_3-NEXT: v_writelane_b32 v23, s50, 11 ; GFX10_3-NEXT: v_writelane_b32 v23, s51, 12 ; GFX10_3-NEXT: v_writelane_b32 v23, s52, 13 ; GFX10_3-NEXT: v_writelane_b32 v23, s53, 14 ; GFX10_3-NEXT: v_writelane_b32 v23, s54, 15 ; GFX10_3-NEXT: v_writelane_b32 v23, s55, 16 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v24, 5, s32 ; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX10_3-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24 ; GFX10_3-NEXT: v_readfirstlane_b32 s54, v24 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v23, 16 ; GFX10_3-NEXT: v_readlane_b32 s54, v23, 15 ; GFX10_3-NEXT: v_readlane_b32 s53, v23, 14 ; GFX10_3-NEXT: v_readlane_b32 s52, v23, 13 ; GFX10_3-NEXT: v_readlane_b32 s51, v23, 12 ; GFX10_3-NEXT: v_readlane_b32 s50, v23, 11 ; GFX10_3-NEXT: v_readlane_b32 s49, v23, 10 ; GFX10_3-NEXT: v_readlane_b32 s48, v23, 9 ; GFX10_3-NEXT: v_readlane_b32 s39, v23, 8 ; GFX10_3-NEXT: v_readlane_b32 s38, v23, 7 ; GFX10_3-NEXT: v_readlane_b32 s37, v23, 6 ; GFX10_3-NEXT: v_readlane_b32 s36, v23, 5 ; GFX10_3-NEXT: v_readlane_b32 s35, v23, 4 ; GFX10_3-NEXT: v_readlane_b32 s34, v23, 3 ; GFX10_3-NEXT: v_readlane_b32 s33, v23, 2 ; GFX10_3-NEXT: v_readlane_b32 s31, v23, 1 ; GFX10_3-NEXT: v_readlane_b32 s30, v23, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_3-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 ; GFX11-NEXT: scratch_store_b32 off, v23, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v23, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: v_writelane_b32 v23, s31, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_writelane_b32 v23, s33, 2 ; GFX11-NEXT: v_writelane_b32 v23, s34, 3 ; GFX11-NEXT: v_writelane_b32 v23, s35, 4 ; GFX11-NEXT: v_writelane_b32 v23, s36, 5 ; GFX11-NEXT: v_writelane_b32 v23, s37, 6 ; GFX11-NEXT: v_writelane_b32 v23, s38, 7 ; GFX11-NEXT: v_writelane_b32 v23, s39, 8 ; GFX11-NEXT: v_writelane_b32 v23, s48, 9 ; GFX11-NEXT: v_writelane_b32 v23, s49, 10 ; GFX11-NEXT: v_writelane_b32 v23, s50, 11 ; GFX11-NEXT: v_writelane_b32 v23, s51, 12 ; GFX11-NEXT: v_writelane_b32 v23, s52, 13 ; GFX11-NEXT: v_writelane_b32 v23, s53, 14 ; GFX11-NEXT: v_writelane_b32 v23, s54, 15 ; GFX11-NEXT: v_writelane_b32 v23, s55, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_addc_u32 s59, s32, 0x4040 ; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_bitcmp1_b32 s59, 0 ; GFX11-NEXT: s_bitset0_b32 s59, 0 ; GFX11-NEXT: s_mov_b32 s54, s59 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v23, 16 ; GFX11-NEXT: v_readlane_b32 s54, v23, 15 ; GFX11-NEXT: v_readlane_b32 s53, v23, 14 ; GFX11-NEXT: v_readlane_b32 s52, v23, 13 ; GFX11-NEXT: v_readlane_b32 s51, v23, 12 ; GFX11-NEXT: v_readlane_b32 s50, v23, 11 ; GFX11-NEXT: v_readlane_b32 s49, v23, 10 ; GFX11-NEXT: v_readlane_b32 s48, v23, 9 ; GFX11-NEXT: v_readlane_b32 s39, v23, 8 ; GFX11-NEXT: v_readlane_b32 s38, v23, 7 ; GFX11-NEXT: v_readlane_b32 s37, v23, 6 ; GFX11-NEXT: v_readlane_b32 s36, v23, 5 ; GFX11-NEXT: v_readlane_b32 s35, v23, 4 ; GFX11-NEXT: v_readlane_b32 s34, v23, 3 ; GFX11-NEXT: v_readlane_b32 s33, v23, 2 ; GFX11-NEXT: v_readlane_b32 s31, v23, 1 ; GFX11-NEXT: v_readlane_b32 s30, v23, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 ; GFX11-NEXT: scratch_load_b32 v23, off, s1 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_store_b32 off, v23, s32 offset:16388 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v23, s30, 0 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: v_writelane_b32 v23, s31, 1 ; GFX12-NEXT: v_writelane_b32 v23, s33, 2 ; GFX12-NEXT: v_writelane_b32 v23, s34, 3 ; GFX12-NEXT: v_writelane_b32 v23, s35, 4 ; GFX12-NEXT: v_writelane_b32 v23, s36, 5 ; GFX12-NEXT: v_writelane_b32 v23, s37, 6 ; GFX12-NEXT: v_writelane_b32 v23, s38, 7 ; GFX12-NEXT: v_writelane_b32 v23, s39, 8 ; GFX12-NEXT: v_writelane_b32 v23, s48, 9 ; GFX12-NEXT: v_writelane_b32 v23, s49, 10 ; GFX12-NEXT: v_writelane_b32 v23, s50, 11 ; GFX12-NEXT: v_writelane_b32 v23, s51, 12 ; GFX12-NEXT: v_writelane_b32 v23, s52, 13 ; GFX12-NEXT: v_writelane_b32 v23, s53, 14 ; GFX12-NEXT: v_writelane_b32 v23, s54, 15 ; GFX12-NEXT: v_writelane_b32 v23, s55, 16 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_add_co_ci_u32 s59, s32, 0x4000 ; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_bitcmp1_b32 s59, 0 ; GFX12-NEXT: s_bitset0_b32 s59, 0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 s54, s59 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: v_readlane_b32 s55, v23, 16 ; GFX12-NEXT: v_readlane_b32 s54, v23, 15 ; GFX12-NEXT: v_readlane_b32 s53, v23, 14 ; GFX12-NEXT: v_readlane_b32 s52, v23, 13 ; GFX12-NEXT: v_readlane_b32 s51, v23, 12 ; GFX12-NEXT: v_readlane_b32 s50, v23, 11 ; GFX12-NEXT: v_readlane_b32 s49, v23, 10 ; GFX12-NEXT: v_readlane_b32 s48, v23, 9 ; GFX12-NEXT: v_readlane_b32 s39, v23, 8 ; GFX12-NEXT: v_readlane_b32 s38, v23, 7 ; GFX12-NEXT: v_readlane_b32 s37, v23, 6 ; GFX12-NEXT: v_readlane_b32 s36, v23, 5 ; GFX12-NEXT: v_readlane_b32 s35, v23, 4 ; GFX12-NEXT: v_readlane_b32 s34, v23, 3 ; GFX12-NEXT: v_readlane_b32 s33, v23, 2 ; GFX12-NEXT: v_readlane_b32 s31, v23, 1 ; GFX12-NEXT: v_readlane_b32 s30, v23, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v23, off, s32 offset:16388 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) ; Force no SGPRs to be available for the carry-out of the vector add. %asm = call %asm.output asm sideeffect "; def $0, $1, $2, $3, $4, $5, $6, $7, $8", "={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={s58},={v[0:15]},={v[16:22]},={vcc}"() %s0 = extractvalue %asm.output %asm, 0 %s1 = extractvalue %asm.output %asm, 1 %s2 = extractvalue %asm.output %asm, 2 %s3 = extractvalue %asm.output %asm, 3 %s4 = extractvalue %asm.output %asm, 4 %s5 = extractvalue %asm.output %asm, 5 %v0 = extractvalue %asm.output %asm, 6 %v1 = extractvalue %asm.output %asm, 7 %vcc = extractvalue %asm.output %asm, 8 ; scc is unavailable since it is live in call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10", "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s54},{scc}"( <16 x i32> %s0, <16 x i32> %s1, <16 x i32> %s2, <8 x i32> %s3, <2 x i32> %s4, i32 %s5, <16 x i32> %v0, <7 x i32> %v1, i64 %vcc, ptr addrspace(5) %alloca1, i32 0) ; use of scc ret void } ; FIXME: This would have test FI at offset 0, but other objects get ; assigned there. This shows a non-0, but inline immediate that can ; fold directly into the address computation. define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset() #1 { ; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX7-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: v_writelane_b32 v21, s30, 0 ; GFX7-NEXT: v_writelane_b32 v21, s31, 1 ; GFX7-NEXT: v_writelane_b32 v21, s33, 2 ; GFX7-NEXT: v_writelane_b32 v21, s34, 3 ; GFX7-NEXT: v_writelane_b32 v21, s35, 4 ; GFX7-NEXT: v_writelane_b32 v21, s36, 5 ; GFX7-NEXT: v_writelane_b32 v21, s37, 6 ; GFX7-NEXT: v_writelane_b32 v21, s38, 7 ; GFX7-NEXT: v_writelane_b32 v21, s39, 8 ; GFX7-NEXT: v_writelane_b32 v21, s48, 9 ; GFX7-NEXT: v_writelane_b32 v21, s49, 10 ; GFX7-NEXT: v_writelane_b32 v21, s50, 11 ; GFX7-NEXT: v_writelane_b32 v21, s51, 12 ; GFX7-NEXT: v_writelane_b32 v21, s52, 13 ; GFX7-NEXT: v_writelane_b32 v21, s53, 14 ; GFX7-NEXT: v_writelane_b32 v21, s54, 15 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec ; GFX7-NEXT: v_writelane_b32 v21, s55, 16 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: v_mad_u32_u24 v22, 16, 64, s32 ; GFX7-NEXT: v_lshrrev_b32_e32 v22, 6, v22 ; GFX7-NEXT: v_readfirstlane_b32 s54, v22 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: v_readlane_b32 s55, v21, 16 ; GFX7-NEXT: v_readlane_b32 s54, v21, 15 ; GFX7-NEXT: v_readlane_b32 s53, v21, 14 ; GFX7-NEXT: v_readlane_b32 s52, v21, 13 ; GFX7-NEXT: v_readlane_b32 s51, v21, 12 ; GFX7-NEXT: v_readlane_b32 s50, v21, 11 ; GFX7-NEXT: v_readlane_b32 s49, v21, 10 ; GFX7-NEXT: v_readlane_b32 s48, v21, 9 ; GFX7-NEXT: v_readlane_b32 s39, v21, 8 ; GFX7-NEXT: v_readlane_b32 s38, v21, 7 ; GFX7-NEXT: v_readlane_b32 s37, v21, 6 ; GFX7-NEXT: v_readlane_b32 s36, v21, 5 ; GFX7-NEXT: v_readlane_b32 s35, v21, 4 ; GFX7-NEXT: v_readlane_b32 s34, v21, 3 ; GFX7-NEXT: v_readlane_b32 s33, v21, 2 ; GFX7-NEXT: v_readlane_b32 s31, v21, 1 ; GFX7-NEXT: v_readlane_b32 s30, v21, 0 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX7-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX8-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_writelane_b32 v21, s30, 0 ; GFX8-NEXT: v_writelane_b32 v21, s31, 1 ; GFX8-NEXT: v_writelane_b32 v21, s33, 2 ; GFX8-NEXT: v_writelane_b32 v21, s34, 3 ; GFX8-NEXT: v_writelane_b32 v21, s35, 4 ; GFX8-NEXT: v_writelane_b32 v21, s36, 5 ; GFX8-NEXT: v_writelane_b32 v21, s37, 6 ; GFX8-NEXT: v_writelane_b32 v21, s38, 7 ; GFX8-NEXT: v_writelane_b32 v21, s39, 8 ; GFX8-NEXT: v_writelane_b32 v21, s48, 9 ; GFX8-NEXT: v_writelane_b32 v21, s49, 10 ; GFX8-NEXT: v_writelane_b32 v21, s50, 11 ; GFX8-NEXT: v_writelane_b32 v21, s51, 12 ; GFX8-NEXT: v_writelane_b32 v21, s52, 13 ; GFX8-NEXT: v_writelane_b32 v21, s53, 14 ; GFX8-NEXT: v_writelane_b32 v21, s54, 15 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: v_writelane_b32 v21, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_mad_u32_u24 v22, 16, 64, s32 ; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX8-NEXT: v_lshrrev_b32_e32 v22, 6, v22 ; GFX8-NEXT: v_readfirstlane_b32 s54, v22 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v21, 16 ; GFX8-NEXT: v_readlane_b32 s54, v21, 15 ; GFX8-NEXT: v_readlane_b32 s53, v21, 14 ; GFX8-NEXT: v_readlane_b32 s52, v21, 13 ; GFX8-NEXT: v_readlane_b32 s51, v21, 12 ; GFX8-NEXT: v_readlane_b32 s50, v21, 11 ; GFX8-NEXT: v_readlane_b32 s49, v21, 10 ; GFX8-NEXT: v_readlane_b32 s48, v21, 9 ; GFX8-NEXT: v_readlane_b32 s39, v21, 8 ; GFX8-NEXT: v_readlane_b32 s38, v21, 7 ; GFX8-NEXT: v_readlane_b32 s37, v21, 6 ; GFX8-NEXT: v_readlane_b32 s36, v21, 5 ; GFX8-NEXT: v_readlane_b32 s35, v21, 4 ; GFX8-NEXT: v_readlane_b32 s34, v21, 3 ; GFX8-NEXT: v_readlane_b32 s33, v21, 2 ; GFX8-NEXT: v_readlane_b32 s31, v21, 1 ; GFX8-NEXT: v_readlane_b32 s30, v21, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX8-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX900-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v21, s30, 0 ; GFX900-NEXT: v_writelane_b32 v21, s31, 1 ; GFX900-NEXT: v_writelane_b32 v21, s33, 2 ; GFX900-NEXT: v_writelane_b32 v21, s34, 3 ; GFX900-NEXT: v_writelane_b32 v21, s35, 4 ; GFX900-NEXT: v_writelane_b32 v21, s36, 5 ; GFX900-NEXT: v_writelane_b32 v21, s37, 6 ; GFX900-NEXT: v_writelane_b32 v21, s38, 7 ; GFX900-NEXT: v_writelane_b32 v21, s39, 8 ; GFX900-NEXT: v_writelane_b32 v21, s48, 9 ; GFX900-NEXT: v_writelane_b32 v21, s49, 10 ; GFX900-NEXT: v_writelane_b32 v21, s50, 11 ; GFX900-NEXT: v_writelane_b32 v21, s51, 12 ; GFX900-NEXT: v_writelane_b32 v21, s52, 13 ; GFX900-NEXT: v_writelane_b32 v21, s53, 14 ; GFX900-NEXT: v_writelane_b32 v21, s54, 15 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: v_writelane_b32 v21, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v22, 6, s32 ; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX900-NEXT: v_add_u32_e32 v22, 16, v22 ; GFX900-NEXT: v_readfirstlane_b32 s54, v22 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v21, 16 ; GFX900-NEXT: v_readlane_b32 s54, v21, 15 ; GFX900-NEXT: v_readlane_b32 s53, v21, 14 ; GFX900-NEXT: v_readlane_b32 s52, v21, 13 ; GFX900-NEXT: v_readlane_b32 s51, v21, 12 ; GFX900-NEXT: v_readlane_b32 s50, v21, 11 ; GFX900-NEXT: v_readlane_b32 s49, v21, 10 ; GFX900-NEXT: v_readlane_b32 s48, v21, 9 ; GFX900-NEXT: v_readlane_b32 s39, v21, 8 ; GFX900-NEXT: v_readlane_b32 s38, v21, 7 ; GFX900-NEXT: v_readlane_b32 s37, v21, 6 ; GFX900-NEXT: v_readlane_b32 s36, v21, 5 ; GFX900-NEXT: v_readlane_b32 s35, v21, 4 ; GFX900-NEXT: v_readlane_b32 s34, v21, 3 ; GFX900-NEXT: v_readlane_b32 s33, v21, 2 ; GFX900-NEXT: v_readlane_b32 s31, v21, 1 ; GFX900-NEXT: v_readlane_b32 s30, v21, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX900-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4010 ; GFX942-NEXT: scratch_store_dword off, v21, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v21, s30, 0 ; GFX942-NEXT: v_writelane_b32 v21, s31, 1 ; GFX942-NEXT: v_writelane_b32 v21, s33, 2 ; GFX942-NEXT: v_writelane_b32 v21, s34, 3 ; GFX942-NEXT: v_writelane_b32 v21, s35, 4 ; GFX942-NEXT: v_writelane_b32 v21, s36, 5 ; GFX942-NEXT: v_writelane_b32 v21, s37, 6 ; GFX942-NEXT: v_writelane_b32 v21, s38, 7 ; GFX942-NEXT: v_writelane_b32 v21, s39, 8 ; GFX942-NEXT: v_writelane_b32 v21, s48, 9 ; GFX942-NEXT: v_writelane_b32 v21, s49, 10 ; GFX942-NEXT: v_writelane_b32 v21, s50, 11 ; GFX942-NEXT: v_writelane_b32 v21, s51, 12 ; GFX942-NEXT: v_writelane_b32 v21, s52, 13 ; GFX942-NEXT: v_writelane_b32 v21, s53, 14 ; GFX942-NEXT: v_writelane_b32 v21, s54, 15 ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec ; GFX942-NEXT: v_writelane_b32 v21, s55, 16 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_addc_u32 s59, s32, 16 ; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX942-NEXT: s_bitcmp1_b32 s59, 0 ; GFX942-NEXT: s_bitset0_b32 s59, 0 ; GFX942-NEXT: s_mov_b32 s54, s59 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v21, 16 ; GFX942-NEXT: v_readlane_b32 s54, v21, 15 ; GFX942-NEXT: v_readlane_b32 s53, v21, 14 ; GFX942-NEXT: v_readlane_b32 s52, v21, 13 ; GFX942-NEXT: v_readlane_b32 s51, v21, 12 ; GFX942-NEXT: v_readlane_b32 s50, v21, 11 ; GFX942-NEXT: v_readlane_b32 s49, v21, 10 ; GFX942-NEXT: v_readlane_b32 s48, v21, 9 ; GFX942-NEXT: v_readlane_b32 s39, v21, 8 ; GFX942-NEXT: v_readlane_b32 s38, v21, 7 ; GFX942-NEXT: v_readlane_b32 s37, v21, 6 ; GFX942-NEXT: v_readlane_b32 s36, v21, 5 ; GFX942-NEXT: v_readlane_b32 s35, v21, 4 ; GFX942-NEXT: v_readlane_b32 s34, v21, 3 ; GFX942-NEXT: v_readlane_b32 s33, v21, 2 ; GFX942-NEXT: v_readlane_b32 s31, v21, 1 ; GFX942-NEXT: v_readlane_b32 s30, v21, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4010 ; GFX942-NEXT: scratch_load_dword v21, off, s2 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80200 ; GFX10_1-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_writelane_b32 v21, s30, 0 ; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo ; GFX10_1-NEXT: v_writelane_b32 v21, s31, 1 ; GFX10_1-NEXT: v_writelane_b32 v21, s33, 2 ; GFX10_1-NEXT: v_writelane_b32 v21, s34, 3 ; GFX10_1-NEXT: v_writelane_b32 v21, s35, 4 ; GFX10_1-NEXT: v_writelane_b32 v21, s36, 5 ; GFX10_1-NEXT: v_writelane_b32 v21, s37, 6 ; GFX10_1-NEXT: v_writelane_b32 v21, s38, 7 ; GFX10_1-NEXT: v_writelane_b32 v21, s39, 8 ; GFX10_1-NEXT: v_writelane_b32 v21, s48, 9 ; GFX10_1-NEXT: v_writelane_b32 v21, s49, 10 ; GFX10_1-NEXT: v_writelane_b32 v21, s50, 11 ; GFX10_1-NEXT: v_writelane_b32 v21, s51, 12 ; GFX10_1-NEXT: v_writelane_b32 v21, s52, 13 ; GFX10_1-NEXT: v_writelane_b32 v21, s53, 14 ; GFX10_1-NEXT: v_writelane_b32 v21, s54, 15 ; GFX10_1-NEXT: v_writelane_b32 v21, s55, 16 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v22, 5, s32 ; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 16, v22 ; GFX10_1-NEXT: v_readfirstlane_b32 s54, v22 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v21, 16 ; GFX10_1-NEXT: v_readlane_b32 s54, v21, 15 ; GFX10_1-NEXT: v_readlane_b32 s53, v21, 14 ; GFX10_1-NEXT: v_readlane_b32 s52, v21, 13 ; GFX10_1-NEXT: v_readlane_b32 s51, v21, 12 ; GFX10_1-NEXT: v_readlane_b32 s50, v21, 11 ; GFX10_1-NEXT: v_readlane_b32 s49, v21, 10 ; GFX10_1-NEXT: v_readlane_b32 s48, v21, 9 ; GFX10_1-NEXT: v_readlane_b32 s39, v21, 8 ; GFX10_1-NEXT: v_readlane_b32 s38, v21, 7 ; GFX10_1-NEXT: v_readlane_b32 s37, v21, 6 ; GFX10_1-NEXT: v_readlane_b32 s36, v21, 5 ; GFX10_1-NEXT: v_readlane_b32 s35, v21, 4 ; GFX10_1-NEXT: v_readlane_b32 s34, v21, 3 ; GFX10_1-NEXT: v_readlane_b32 s33, v21, 2 ; GFX10_1-NEXT: v_readlane_b32 s31, v21, 1 ; GFX10_1-NEXT: v_readlane_b32 s30, v21, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80200 ; GFX10_1-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200 ; GFX10_3-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_writelane_b32 v21, s30, 0 ; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo ; GFX10_3-NEXT: v_writelane_b32 v21, s31, 1 ; GFX10_3-NEXT: v_writelane_b32 v21, s33, 2 ; GFX10_3-NEXT: v_writelane_b32 v21, s34, 3 ; GFX10_3-NEXT: v_writelane_b32 v21, s35, 4 ; GFX10_3-NEXT: v_writelane_b32 v21, s36, 5 ; GFX10_3-NEXT: v_writelane_b32 v21, s37, 6 ; GFX10_3-NEXT: v_writelane_b32 v21, s38, 7 ; GFX10_3-NEXT: v_writelane_b32 v21, s39, 8 ; GFX10_3-NEXT: v_writelane_b32 v21, s48, 9 ; GFX10_3-NEXT: v_writelane_b32 v21, s49, 10 ; GFX10_3-NEXT: v_writelane_b32 v21, s50, 11 ; GFX10_3-NEXT: v_writelane_b32 v21, s51, 12 ; GFX10_3-NEXT: v_writelane_b32 v21, s52, 13 ; GFX10_3-NEXT: v_writelane_b32 v21, s53, 14 ; GFX10_3-NEXT: v_writelane_b32 v21, s54, 15 ; GFX10_3-NEXT: v_writelane_b32 v21, s55, 16 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v22, 5, s32 ; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 16, v22 ; GFX10_3-NEXT: v_readfirstlane_b32 s54, v22 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v21, 16 ; GFX10_3-NEXT: v_readlane_b32 s54, v21, 15 ; GFX10_3-NEXT: v_readlane_b32 s53, v21, 14 ; GFX10_3-NEXT: v_readlane_b32 s52, v21, 13 ; GFX10_3-NEXT: v_readlane_b32 s51, v21, 12 ; GFX10_3-NEXT: v_readlane_b32 s50, v21, 11 ; GFX10_3-NEXT: v_readlane_b32 s49, v21, 10 ; GFX10_3-NEXT: v_readlane_b32 s48, v21, 9 ; GFX10_3-NEXT: v_readlane_b32 s39, v21, 8 ; GFX10_3-NEXT: v_readlane_b32 s38, v21, 7 ; GFX10_3-NEXT: v_readlane_b32 s37, v21, 6 ; GFX10_3-NEXT: v_readlane_b32 s36, v21, 5 ; GFX10_3-NEXT: v_readlane_b32 s35, v21, 4 ; GFX10_3-NEXT: v_readlane_b32 s34, v21, 3 ; GFX10_3-NEXT: v_readlane_b32 s33, v21, 2 ; GFX10_3-NEXT: v_readlane_b32 s31, v21, 1 ; GFX10_3-NEXT: v_readlane_b32 s30, v21, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200 ; GFX10_3-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4010 ; GFX11-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v21, s30, 0 ; GFX11-NEXT: s_and_b32 s59, 0, exec_lo ; GFX11-NEXT: v_writelane_b32 v21, s31, 1 ; GFX11-NEXT: v_writelane_b32 v21, s33, 2 ; GFX11-NEXT: v_writelane_b32 v21, s34, 3 ; GFX11-NEXT: v_writelane_b32 v21, s35, 4 ; GFX11-NEXT: v_writelane_b32 v21, s36, 5 ; GFX11-NEXT: v_writelane_b32 v21, s37, 6 ; GFX11-NEXT: v_writelane_b32 v21, s38, 7 ; GFX11-NEXT: v_writelane_b32 v21, s39, 8 ; GFX11-NEXT: v_writelane_b32 v21, s48, 9 ; GFX11-NEXT: v_writelane_b32 v21, s49, 10 ; GFX11-NEXT: v_writelane_b32 v21, s50, 11 ; GFX11-NEXT: v_writelane_b32 v21, s51, 12 ; GFX11-NEXT: v_writelane_b32 v21, s52, 13 ; GFX11-NEXT: v_writelane_b32 v21, s53, 14 ; GFX11-NEXT: v_writelane_b32 v21, s54, 15 ; GFX11-NEXT: v_writelane_b32 v21, s55, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_addc_u32 s59, s32, 16 ; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_bitcmp1_b32 s59, 0 ; GFX11-NEXT: s_bitset0_b32 s59, 0 ; GFX11-NEXT: s_mov_b32 s54, s59 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v21, 16 ; GFX11-NEXT: v_readlane_b32 s54, v21, 15 ; GFX11-NEXT: v_readlane_b32 s53, v21, 14 ; GFX11-NEXT: v_readlane_b32 s52, v21, 13 ; GFX11-NEXT: v_readlane_b32 s51, v21, 12 ; GFX11-NEXT: v_readlane_b32 s50, v21, 11 ; GFX11-NEXT: v_readlane_b32 s49, v21, 10 ; GFX11-NEXT: v_readlane_b32 s48, v21, 9 ; GFX11-NEXT: v_readlane_b32 s39, v21, 8 ; GFX11-NEXT: v_readlane_b32 s38, v21, 7 ; GFX11-NEXT: v_readlane_b32 s37, v21, 6 ; GFX11-NEXT: v_readlane_b32 s36, v21, 5 ; GFX11-NEXT: v_readlane_b32 s35, v21, 4 ; GFX11-NEXT: v_readlane_b32 s34, v21, 3 ; GFX11-NEXT: v_readlane_b32 s33, v21, 2 ; GFX11-NEXT: v_readlane_b32 s31, v21, 1 ; GFX11-NEXT: v_readlane_b32 s30, v21, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4010 ; GFX11-NEXT: scratch_load_b32 v21, off, s1 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_store_b32 off, v21, s32 offset:16384 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v21, s30, 0 ; GFX12-NEXT: s_and_b32 s59, 0, exec_lo ; GFX12-NEXT: v_writelane_b32 v21, s31, 1 ; GFX12-NEXT: v_writelane_b32 v21, s33, 2 ; GFX12-NEXT: v_writelane_b32 v21, s34, 3 ; GFX12-NEXT: v_writelane_b32 v21, s35, 4 ; GFX12-NEXT: v_writelane_b32 v21, s36, 5 ; GFX12-NEXT: v_writelane_b32 v21, s37, 6 ; GFX12-NEXT: v_writelane_b32 v21, s38, 7 ; GFX12-NEXT: v_writelane_b32 v21, s39, 8 ; GFX12-NEXT: v_writelane_b32 v21, s48, 9 ; GFX12-NEXT: v_writelane_b32 v21, s49, 10 ; GFX12-NEXT: v_writelane_b32 v21, s50, 11 ; GFX12-NEXT: v_writelane_b32 v21, s51, 12 ; GFX12-NEXT: v_writelane_b32 v21, s52, 13 ; GFX12-NEXT: v_writelane_b32 v21, s53, 14 ; GFX12-NEXT: v_writelane_b32 v21, s54, 15 ; GFX12-NEXT: v_writelane_b32 v21, s55, 16 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX12-NEXT: s_mov_b32 s54, s32 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_readlane_b32 s55, v21, 16 ; GFX12-NEXT: v_readlane_b32 s54, v21, 15 ; GFX12-NEXT: v_readlane_b32 s53, v21, 14 ; GFX12-NEXT: v_readlane_b32 s52, v21, 13 ; GFX12-NEXT: v_readlane_b32 s51, v21, 12 ; GFX12-NEXT: v_readlane_b32 s50, v21, 11 ; GFX12-NEXT: v_readlane_b32 s49, v21, 10 ; GFX12-NEXT: v_readlane_b32 s48, v21, 9 ; GFX12-NEXT: v_readlane_b32 s39, v21, 8 ; GFX12-NEXT: v_readlane_b32 s38, v21, 7 ; GFX12-NEXT: v_readlane_b32 s37, v21, 6 ; GFX12-NEXT: v_readlane_b32 s36, v21, 5 ; GFX12-NEXT: v_readlane_b32 s35, v21, 4 ; GFX12-NEXT: v_readlane_b32 s34, v21, 3 ; GFX12-NEXT: v_readlane_b32 s33, v21, 2 ; GFX12-NEXT: v_readlane_b32 s31, v21, 1 ; GFX12-NEXT: v_readlane_b32 s30, v21, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v21, off, s32 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 16, addrspace(5) ; Force no SGPRs to be available for the carry-out of the vector add. %asm = call %asm.output2 asm sideeffect "; def $0, $1, $2, $3, $4, $5, $6, $7, $8", "={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={s58},={v[0:15]},={v[16:20]},={vcc}"() %s0 = extractvalue %asm.output2 %asm, 0 %s1 = extractvalue %asm.output2 %asm, 1 %s2 = extractvalue %asm.output2 %asm, 2 %s3 = extractvalue %asm.output2 %asm, 3 %s4 = extractvalue %asm.output2 %asm, 4 %s5 = extractvalue %asm.output2 %asm, 5 %v0 = extractvalue %asm.output2 %asm, 6 %v1 = extractvalue %asm.output2 %asm, 7 %vcc = extractvalue %asm.output2 %asm, 8 ; scc is unavailable since it is live in call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10", "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:20]},{vcc},{s54},{scc}"( <16 x i32> %s0, <16 x i32> %s1, <16 x i32> %s2, <8 x i32> %s3, <2 x i32> %s4, i32 %s5, <16 x i32> %v0, <5 x i32> %v1, i64 %vcc, ptr addrspace(5) %alloca0, i32 0) ; use of scc ret void } ; This case isn't using SGPRs yet. ; FIXME: Should also use one more VGPR, but currently fails to allocate on gfx8. define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset() #0 { ; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX7-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX7-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: v_writelane_b32 v23, s28, 17 ; GFX7-NEXT: v_writelane_b32 v23, s29, 18 ; GFX7-NEXT: v_writelane_b32 v23, s30, 0 ; GFX7-NEXT: v_writelane_b32 v23, s31, 1 ; GFX7-NEXT: v_writelane_b32 v23, s33, 2 ; GFX7-NEXT: v_writelane_b32 v23, s34, 3 ; GFX7-NEXT: v_writelane_b32 v23, s35, 4 ; GFX7-NEXT: v_writelane_b32 v23, s36, 5 ; GFX7-NEXT: v_writelane_b32 v23, s37, 6 ; GFX7-NEXT: v_writelane_b32 v23, s38, 7 ; GFX7-NEXT: v_writelane_b32 v23, s39, 8 ; GFX7-NEXT: v_writelane_b32 v23, s48, 9 ; GFX7-NEXT: v_writelane_b32 v23, s49, 10 ; GFX7-NEXT: v_writelane_b32 v23, s50, 11 ; GFX7-NEXT: v_writelane_b32 v23, s51, 12 ; GFX7-NEXT: v_writelane_b32 v23, s52, 13 ; GFX7-NEXT: s_lshr_b32 s5, s32, 6 ; GFX7-NEXT: v_writelane_b32 v23, s53, 14 ; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 ; GFX7-NEXT: s_add_i32 s4, s5, 0x4240 ; GFX7-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane ; GFX7-NEXT: v_writelane_b32 v23, s54, 15 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 ; GFX7-NEXT: v_writelane_b32 v22, s4, 0 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec ; GFX7-NEXT: v_writelane_b32 v23, s55, 16 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use alloca0 v0 ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: v_readlane_b32 s54, v22, 0 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: v_readlane_b32 s55, v23, 16 ; GFX7-NEXT: v_readlane_b32 s54, v23, 15 ; GFX7-NEXT: v_readlane_b32 s53, v23, 14 ; GFX7-NEXT: v_readlane_b32 s52, v23, 13 ; GFX7-NEXT: v_readlane_b32 s51, v23, 12 ; GFX7-NEXT: v_readlane_b32 s50, v23, 11 ; GFX7-NEXT: v_readlane_b32 s49, v23, 10 ; GFX7-NEXT: v_readlane_b32 s48, v23, 9 ; GFX7-NEXT: v_readlane_b32 s39, v23, 8 ; GFX7-NEXT: v_readlane_b32 s38, v23, 7 ; GFX7-NEXT: v_readlane_b32 s37, v23, 6 ; GFX7-NEXT: v_readlane_b32 s36, v23, 5 ; GFX7-NEXT: v_readlane_b32 s35, v23, 4 ; GFX7-NEXT: v_readlane_b32 s34, v23, 3 ; GFX7-NEXT: v_readlane_b32 s33, v23, 2 ; GFX7-NEXT: v_readlane_b32 s31, v23, 1 ; GFX7-NEXT: v_readlane_b32 s30, v23, 0 ; GFX7-NEXT: v_readlane_b32 s28, v23, 17 ; GFX7-NEXT: v_readlane_b32 s29, v23, 18 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX7-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX7-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX8-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_writelane_b32 v23, s30, 0 ; GFX8-NEXT: v_writelane_b32 v23, s31, 1 ; GFX8-NEXT: v_writelane_b32 v23, s33, 2 ; GFX8-NEXT: v_writelane_b32 v23, s34, 3 ; GFX8-NEXT: v_writelane_b32 v23, s35, 4 ; GFX8-NEXT: v_writelane_b32 v23, s36, 5 ; GFX8-NEXT: v_writelane_b32 v23, s37, 6 ; GFX8-NEXT: v_writelane_b32 v23, s38, 7 ; GFX8-NEXT: v_writelane_b32 v23, s39, 8 ; GFX8-NEXT: v_writelane_b32 v23, s48, 9 ; GFX8-NEXT: v_writelane_b32 v23, s49, 10 ; GFX8-NEXT: v_writelane_b32 v23, s50, 11 ; GFX8-NEXT: v_writelane_b32 v23, s51, 12 ; GFX8-NEXT: v_writelane_b32 v23, s52, 13 ; GFX8-NEXT: s_lshr_b32 s5, s32, 6 ; GFX8-NEXT: v_writelane_b32 v23, s53, 14 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: s_add_i32 s4, s5, 0x4240 ; GFX8-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane ; GFX8-NEXT: v_writelane_b32 v23, s54, 15 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: v_writelane_b32 v22, s4, 0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: v_writelane_b32 v23, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX8-NEXT: v_readlane_b32 s54, v22, 0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v23, 16 ; GFX8-NEXT: v_readlane_b32 s54, v23, 15 ; GFX8-NEXT: v_readlane_b32 s53, v23, 14 ; GFX8-NEXT: v_readlane_b32 s52, v23, 13 ; GFX8-NEXT: v_readlane_b32 s51, v23, 12 ; GFX8-NEXT: v_readlane_b32 s50, v23, 11 ; GFX8-NEXT: v_readlane_b32 s49, v23, 10 ; GFX8-NEXT: v_readlane_b32 s48, v23, 9 ; GFX8-NEXT: v_readlane_b32 s39, v23, 8 ; GFX8-NEXT: v_readlane_b32 s38, v23, 7 ; GFX8-NEXT: v_readlane_b32 s37, v23, 6 ; GFX8-NEXT: v_readlane_b32 s36, v23, 5 ; GFX8-NEXT: v_readlane_b32 s35, v23, 4 ; GFX8-NEXT: v_readlane_b32 s34, v23, 3 ; GFX8-NEXT: v_readlane_b32 s33, v23, 2 ; GFX8-NEXT: v_readlane_b32 s31, v23, 1 ; GFX8-NEXT: v_readlane_b32 s30, v23, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX8-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX8-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX900-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v23, s30, 0 ; GFX900-NEXT: v_writelane_b32 v23, s31, 1 ; GFX900-NEXT: v_writelane_b32 v23, s33, 2 ; GFX900-NEXT: v_writelane_b32 v23, s34, 3 ; GFX900-NEXT: v_writelane_b32 v23, s35, 4 ; GFX900-NEXT: v_writelane_b32 v23, s36, 5 ; GFX900-NEXT: v_writelane_b32 v23, s37, 6 ; GFX900-NEXT: v_writelane_b32 v23, s38, 7 ; GFX900-NEXT: v_writelane_b32 v23, s39, 8 ; GFX900-NEXT: v_writelane_b32 v23, s48, 9 ; GFX900-NEXT: v_writelane_b32 v23, s49, 10 ; GFX900-NEXT: v_writelane_b32 v23, s50, 11 ; GFX900-NEXT: v_writelane_b32 v23, s51, 12 ; GFX900-NEXT: v_writelane_b32 v23, s52, 13 ; GFX900-NEXT: s_lshr_b32 s5, s32, 6 ; GFX900-NEXT: v_writelane_b32 v23, s53, 14 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: s_add_i32 s4, s5, 0x4240 ; GFX900-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane ; GFX900-NEXT: v_writelane_b32 v23, s54, 15 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: v_writelane_b32 v22, s4, 0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: v_writelane_b32 v23, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX900-NEXT: v_readlane_b32 s54, v22, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v23, 16 ; GFX900-NEXT: v_readlane_b32 s54, v23, 15 ; GFX900-NEXT: v_readlane_b32 s53, v23, 14 ; GFX900-NEXT: v_readlane_b32 s52, v23, 13 ; GFX900-NEXT: v_readlane_b32 s51, v23, 12 ; GFX900-NEXT: v_readlane_b32 s50, v23, 11 ; GFX900-NEXT: v_readlane_b32 s49, v23, 10 ; GFX900-NEXT: v_readlane_b32 s48, v23, 9 ; GFX900-NEXT: v_readlane_b32 s39, v23, 8 ; GFX900-NEXT: v_readlane_b32 s38, v23, 7 ; GFX900-NEXT: v_readlane_b32 s37, v23, 6 ; GFX900-NEXT: v_readlane_b32 s36, v23, 5 ; GFX900-NEXT: v_readlane_b32 s35, v23, 4 ; GFX900-NEXT: v_readlane_b32 s34, v23, 3 ; GFX900-NEXT: v_readlane_b32 s33, v23, 2 ; GFX900-NEXT: v_readlane_b32 s31, v23, 1 ; GFX900-NEXT: v_readlane_b32 s30, v23, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX900-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX900-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 ; GFX942-NEXT: scratch_store_dword off, v22, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v22, s30, 0 ; GFX942-NEXT: v_writelane_b32 v22, s31, 1 ; GFX942-NEXT: v_writelane_b32 v22, s33, 2 ; GFX942-NEXT: v_writelane_b32 v22, s34, 3 ; GFX942-NEXT: v_writelane_b32 v22, s35, 4 ; GFX942-NEXT: v_writelane_b32 v22, s36, 5 ; GFX942-NEXT: v_writelane_b32 v22, s37, 6 ; GFX942-NEXT: v_writelane_b32 v22, s38, 7 ; GFX942-NEXT: v_writelane_b32 v22, s39, 8 ; GFX942-NEXT: v_writelane_b32 v22, s48, 9 ; GFX942-NEXT: v_writelane_b32 v22, s49, 10 ; GFX942-NEXT: v_writelane_b32 v22, s50, 11 ; GFX942-NEXT: v_writelane_b32 v22, s51, 12 ; GFX942-NEXT: v_writelane_b32 v22, s52, 13 ; GFX942-NEXT: v_writelane_b32 v22, s53, 14 ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_writelane_b32 v22, s54, 15 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: v_writelane_b32 v22, s55, 16 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_add_i32 s58, s32, 0x4240 ; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec ; GFX942-NEXT: s_mov_b32 s54, s58 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v22, 16 ; GFX942-NEXT: v_readlane_b32 s54, v22, 15 ; GFX942-NEXT: v_readlane_b32 s53, v22, 14 ; GFX942-NEXT: v_readlane_b32 s52, v22, 13 ; GFX942-NEXT: v_readlane_b32 s51, v22, 12 ; GFX942-NEXT: v_readlane_b32 s50, v22, 11 ; GFX942-NEXT: v_readlane_b32 s49, v22, 10 ; GFX942-NEXT: v_readlane_b32 s48, v22, 9 ; GFX942-NEXT: v_readlane_b32 s39, v22, 8 ; GFX942-NEXT: v_readlane_b32 s38, v22, 7 ; GFX942-NEXT: v_readlane_b32 s37, v22, 6 ; GFX942-NEXT: v_readlane_b32 s36, v22, 5 ; GFX942-NEXT: v_readlane_b32 s35, v22, 4 ; GFX942-NEXT: v_readlane_b32 s34, v22, 3 ; GFX942-NEXT: v_readlane_b32 s33, v22, 2 ; GFX942-NEXT: v_readlane_b32 s31, v22, 1 ; GFX942-NEXT: v_readlane_b32 s30, v22, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 ; GFX942-NEXT: scratch_load_dword v22, off, s2 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_1-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_writelane_b32 v22, s30, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5 ; GFX10_1-NEXT: s_add_i32 s58, s4, 0x4240 ; GFX10_1-NEXT: v_writelane_b32 v22, s31, 1 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_writelane_b32 v22, s33, 2 ; GFX10_1-NEXT: v_writelane_b32 v22, s34, 3 ; GFX10_1-NEXT: v_writelane_b32 v22, s35, 4 ; GFX10_1-NEXT: v_writelane_b32 v22, s36, 5 ; GFX10_1-NEXT: v_writelane_b32 v22, s37, 6 ; GFX10_1-NEXT: v_writelane_b32 v22, s38, 7 ; GFX10_1-NEXT: v_writelane_b32 v22, s39, 8 ; GFX10_1-NEXT: v_writelane_b32 v22, s48, 9 ; GFX10_1-NEXT: v_writelane_b32 v22, s49, 10 ; GFX10_1-NEXT: v_writelane_b32 v22, s50, 11 ; GFX10_1-NEXT: v_writelane_b32 v22, s51, 12 ; GFX10_1-NEXT: v_writelane_b32 v22, s52, 13 ; GFX10_1-NEXT: v_writelane_b32 v22, s53, 14 ; GFX10_1-NEXT: v_writelane_b32 v22, s54, 15 ; GFX10_1-NEXT: v_writelane_b32 v22, s55, 16 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX10_1-NEXT: s_mov_b32 s54, s58 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v22, 16 ; GFX10_1-NEXT: v_readlane_b32 s54, v22, 15 ; GFX10_1-NEXT: v_readlane_b32 s53, v22, 14 ; GFX10_1-NEXT: v_readlane_b32 s52, v22, 13 ; GFX10_1-NEXT: v_readlane_b32 s51, v22, 12 ; GFX10_1-NEXT: v_readlane_b32 s50, v22, 11 ; GFX10_1-NEXT: v_readlane_b32 s49, v22, 10 ; GFX10_1-NEXT: v_readlane_b32 s48, v22, 9 ; GFX10_1-NEXT: v_readlane_b32 s39, v22, 8 ; GFX10_1-NEXT: v_readlane_b32 s38, v22, 7 ; GFX10_1-NEXT: v_readlane_b32 s37, v22, 6 ; GFX10_1-NEXT: v_readlane_b32 s36, v22, 5 ; GFX10_1-NEXT: v_readlane_b32 s35, v22, 4 ; GFX10_1-NEXT: v_readlane_b32 s34, v22, 3 ; GFX10_1-NEXT: v_readlane_b32 s33, v22, 2 ; GFX10_1-NEXT: v_readlane_b32 s31, v22, 1 ; GFX10_1-NEXT: v_readlane_b32 s30, v22, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_1-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_3-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_writelane_b32 v22, s30, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5 ; GFX10_3-NEXT: s_add_i32 s58, s4, 0x4240 ; GFX10_3-NEXT: v_writelane_b32 v22, s31, 1 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_writelane_b32 v22, s33, 2 ; GFX10_3-NEXT: v_writelane_b32 v22, s34, 3 ; GFX10_3-NEXT: v_writelane_b32 v22, s35, 4 ; GFX10_3-NEXT: v_writelane_b32 v22, s36, 5 ; GFX10_3-NEXT: v_writelane_b32 v22, s37, 6 ; GFX10_3-NEXT: v_writelane_b32 v22, s38, 7 ; GFX10_3-NEXT: v_writelane_b32 v22, s39, 8 ; GFX10_3-NEXT: v_writelane_b32 v22, s48, 9 ; GFX10_3-NEXT: v_writelane_b32 v22, s49, 10 ; GFX10_3-NEXT: v_writelane_b32 v22, s50, 11 ; GFX10_3-NEXT: v_writelane_b32 v22, s51, 12 ; GFX10_3-NEXT: v_writelane_b32 v22, s52, 13 ; GFX10_3-NEXT: v_writelane_b32 v22, s53, 14 ; GFX10_3-NEXT: v_writelane_b32 v22, s54, 15 ; GFX10_3-NEXT: v_writelane_b32 v22, s55, 16 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX10_3-NEXT: s_mov_b32 s54, s58 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v22, 16 ; GFX10_3-NEXT: v_readlane_b32 s54, v22, 15 ; GFX10_3-NEXT: v_readlane_b32 s53, v22, 14 ; GFX10_3-NEXT: v_readlane_b32 s52, v22, 13 ; GFX10_3-NEXT: v_readlane_b32 s51, v22, 12 ; GFX10_3-NEXT: v_readlane_b32 s50, v22, 11 ; GFX10_3-NEXT: v_readlane_b32 s49, v22, 10 ; GFX10_3-NEXT: v_readlane_b32 s48, v22, 9 ; GFX10_3-NEXT: v_readlane_b32 s39, v22, 8 ; GFX10_3-NEXT: v_readlane_b32 s38, v22, 7 ; GFX10_3-NEXT: v_readlane_b32 s37, v22, 6 ; GFX10_3-NEXT: v_readlane_b32 s36, v22, 5 ; GFX10_3-NEXT: v_readlane_b32 s35, v22, 4 ; GFX10_3-NEXT: v_readlane_b32 s34, v22, 3 ; GFX10_3-NEXT: v_readlane_b32 s33, v22, 2 ; GFX10_3-NEXT: v_readlane_b32 s31, v22, 1 ; GFX10_3-NEXT: v_readlane_b32 s30, v22, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_3-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 ; GFX11-NEXT: scratch_store_b32 off, v22, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v22, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: s_add_i32 s58, s32, 0x4240 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: v_writelane_b32 v22, s31, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_writelane_b32 v22, s33, 2 ; GFX11-NEXT: v_writelane_b32 v22, s34, 3 ; GFX11-NEXT: v_writelane_b32 v22, s35, 4 ; GFX11-NEXT: v_writelane_b32 v22, s36, 5 ; GFX11-NEXT: v_writelane_b32 v22, s37, 6 ; GFX11-NEXT: v_writelane_b32 v22, s38, 7 ; GFX11-NEXT: v_writelane_b32 v22, s39, 8 ; GFX11-NEXT: v_writelane_b32 v22, s48, 9 ; GFX11-NEXT: v_writelane_b32 v22, s49, 10 ; GFX11-NEXT: v_writelane_b32 v22, s50, 11 ; GFX11-NEXT: v_writelane_b32 v22, s51, 12 ; GFX11-NEXT: v_writelane_b32 v22, s52, 13 ; GFX11-NEXT: v_writelane_b32 v22, s53, 14 ; GFX11-NEXT: v_writelane_b32 v22, s54, 15 ; GFX11-NEXT: v_writelane_b32 v22, s55, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX11-NEXT: s_mov_b32 s54, s58 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s55, v22, 16 ; GFX11-NEXT: v_readlane_b32 s54, v22, 15 ; GFX11-NEXT: v_readlane_b32 s53, v22, 14 ; GFX11-NEXT: v_readlane_b32 s52, v22, 13 ; GFX11-NEXT: v_readlane_b32 s51, v22, 12 ; GFX11-NEXT: v_readlane_b32 s50, v22, 11 ; GFX11-NEXT: v_readlane_b32 s49, v22, 10 ; GFX11-NEXT: v_readlane_b32 s48, v22, 9 ; GFX11-NEXT: v_readlane_b32 s39, v22, 8 ; GFX11-NEXT: v_readlane_b32 s38, v22, 7 ; GFX11-NEXT: v_readlane_b32 s37, v22, 6 ; GFX11-NEXT: v_readlane_b32 s36, v22, 5 ; GFX11-NEXT: v_readlane_b32 s35, v22, 4 ; GFX11-NEXT: v_readlane_b32 s34, v22, 3 ; GFX11-NEXT: v_readlane_b32 s33, v22, 2 ; GFX11-NEXT: v_readlane_b32 s31, v22, 1 ; GFX11-NEXT: v_readlane_b32 s30, v22, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 ; GFX11-NEXT: scratch_load_b32 v22, off, s1 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_store_b32 off, v22, s32 offset:32768 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v22, s30, 0 ; GFX12-NEXT: s_add_co_i32 s58, s32, 0x4200 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: v_writelane_b32 v22, s31, 1 ; GFX12-NEXT: v_writelane_b32 v22, s33, 2 ; GFX12-NEXT: v_writelane_b32 v22, s34, 3 ; GFX12-NEXT: v_writelane_b32 v22, s35, 4 ; GFX12-NEXT: v_writelane_b32 v22, s36, 5 ; GFX12-NEXT: v_writelane_b32 v22, s37, 6 ; GFX12-NEXT: v_writelane_b32 v22, s38, 7 ; GFX12-NEXT: v_writelane_b32 v22, s39, 8 ; GFX12-NEXT: v_writelane_b32 v22, s48, 9 ; GFX12-NEXT: v_writelane_b32 v22, s49, 10 ; GFX12-NEXT: v_writelane_b32 v22, s50, 11 ; GFX12-NEXT: v_writelane_b32 v22, s51, 12 ; GFX12-NEXT: v_writelane_b32 v22, s52, 13 ; GFX12-NEXT: v_writelane_b32 v22, s53, 14 ; GFX12-NEXT: v_writelane_b32 v22, s54, 15 ; GFX12-NEXT: v_writelane_b32 v22, s55, 16 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 s54, s58 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: v_readlane_b32 s55, v22, 16 ; GFX12-NEXT: v_readlane_b32 s54, v22, 15 ; GFX12-NEXT: v_readlane_b32 s53, v22, 14 ; GFX12-NEXT: v_readlane_b32 s52, v22, 13 ; GFX12-NEXT: v_readlane_b32 s51, v22, 12 ; GFX12-NEXT: v_readlane_b32 s50, v22, 11 ; GFX12-NEXT: v_readlane_b32 s49, v22, 10 ; GFX12-NEXT: v_readlane_b32 s48, v22, 9 ; GFX12-NEXT: v_readlane_b32 s39, v22, 8 ; GFX12-NEXT: v_readlane_b32 s38, v22, 7 ; GFX12-NEXT: v_readlane_b32 s37, v22, 6 ; GFX12-NEXT: v_readlane_b32 s36, v22, 5 ; GFX12-NEXT: v_readlane_b32 s35, v22, 4 ; GFX12-NEXT: v_readlane_b32 s34, v22, 3 ; GFX12-NEXT: v_readlane_b32 s33, v22, 2 ; GFX12-NEXT: v_readlane_b32 s31, v22, 1 ; GFX12-NEXT: v_readlane_b32 s30, v22, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v22, off, s32 offset:32768 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca [4096 x i32], align 4, addrspace(5) call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) ; Force no SGPRs to be available for the carry-out of the vector add. %asm = call %asm.output3 asm sideeffect "; def $0, $1, $2, $3, $4, $5, $6, $7", "={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={v[0:15]},={v[16:21]},={vcc}"() %s0 = extractvalue %asm.output3 %asm, 0 %s1 = extractvalue %asm.output3 %asm, 1 %s2 = extractvalue %asm.output3 %asm, 2 %s3 = extractvalue %asm.output3 %asm, 3 %s4 = extractvalue %asm.output3 %asm, 4 %v0 = extractvalue %asm.output3 %asm, 5 %v1 = extractvalue %asm.output3 %asm, 6 %vcc = extractvalue %asm.output3 %asm, 7 %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 128 ; scc is unavailable since it is live in call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9", "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{v[0:15]},{v[16:21]},{vcc},{s54},{scc}"( <16 x i32> %s0, <16 x i32> %s1, <16 x i32> %s2, <8 x i32> %s3, <2 x i32> %s4, <16 x i32> %v0, <6 x i32> %v1, i64 %vcc, ptr addrspace(5) %alloca1.offset, i32 0) ; use of scc ret void } ; For gfx8/gfx9, this should enforce a budget of 24 VGPRs, and 60 SGPRs (4 ; are reserved at the end for xnack + vcc). attributes #0 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" } attributes #1 = { nounwind alignstack=16 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" }