; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; FIXME: Fails expensive checks, should re-enable verifier, see issue #130884 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs=0 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs=0 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs=0 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s ; Test that we can form v_pk_mov_b32 in certain shuffles when they ; originate from 32-bit physreg copy sequences. ; TODO: Test 16-bit paired cases define void @shufflevector_v2i32_10_physreg_even_vgpr_pair_copy(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v2i32_10_physreg_even_vgpr_pair_copy: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v4, v5 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v3, v5 ; GFX900-NEXT: global_store_dwordx2 v0, v[3:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v2i32_10_physreg_even_vgpr_pair_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v4, v5 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v2, v5 ; GFX90A-NEXT: v_mov_b32_e32 v3, v4 ; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v2i32_10_physreg_even_vgpr_pair_copy: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def v4, v5 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v2, v5 ; GFX940-NEXT: v_mov_b32_e32 v3, v4 ; GFX940-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32 } asm "; def $0, $1", "={v4},={v5}"() %asm.0 = extractvalue { i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32 } %asm, 1 %insert0 = insertelement <2 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <2 x i32> %insert0, i32 %asm.1, i32 1 %shuffle = shufflevector <2 x i32> %insert1, <2 x i32> poison, <2 x i32> store <2 x i32> %shuffle, ptr addrspace(1) %ptr, align 8 ret void } define void @shufflevector_v2i32_10_physreg_odd_vgpr_pair_copy(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v2i32_10_physreg_odd_vgpr_pair_copy: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v5, v6 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v4, v6 ; GFX900-NEXT: global_store_dwordx2 v0, v[4:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v2i32_10_physreg_odd_vgpr_pair_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v5, v6 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v4, v6 ; GFX90A-NEXT: global_store_dwordx2 v0, v[4:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v2i32_10_physreg_odd_vgpr_pair_copy: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def v5, v6 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v4, v6 ; GFX940-NEXT: global_store_dwordx2 v0, v[4:5], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32 } asm "; def $0, $1", "={v5},={v6}"() %asm.0 = extractvalue { i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32 } %asm, 1 %insert0 = insertelement <2 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <2 x i32> %insert0, i32 %asm.1, i32 1 %shuffle = shufflevector <2 x i32> %insert1, <2 x i32> poison, <2 x i32> store <2 x i32> %shuffle, ptr addrspace(1) %ptr, align 8 ret void } define void @shufflevector_v2i32_10_physreg_even_disjoint_even_vgpr_pair(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v2i32_10_physreg_even_disjoint_even_vgpr_pair: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v4, v6 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v3, v6 ; GFX900-NEXT: global_store_dwordx2 v0, v[3:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v2i32_10_physreg_even_disjoint_even_vgpr_pair: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v4, v6 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, v4 ; GFX90A-NEXT: global_store_dwordx2 v0, v[6:7], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v2i32_10_physreg_even_disjoint_even_vgpr_pair: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def v4, v6 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v7, v4 ; GFX940-NEXT: global_store_dwordx2 v0, v[6:7], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32 } asm "; def $0, $1", "={v4},={v6}"() %asm.0 = extractvalue { i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32 } %asm, 1 %insert0 = insertelement <2 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <2 x i32> %insert0, i32 %asm.1, i32 1 %shuffle = shufflevector <2 x i32> %insert1, <2 x i32> poison, <2 x i32> store <2 x i32> %shuffle, ptr addrspace(1) %ptr, align 8 ret void } define void @shufflevector_v2i32_00_physreg_even_vgpr_pair_copy(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v2i32_00_physreg_even_vgpr_pair_copy: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v4, v5 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: v_mov_b32_e32 v5, v4 ; GFX900-NEXT: global_store_dwordx2 v0, v[4:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v2i32_00_physreg_even_vgpr_pair_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v4, v5 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: v_mov_b32_e32 v5, v4 ; GFX90A-NEXT: global_store_dwordx2 v0, v[4:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v2i32_00_physreg_even_vgpr_pair_copy: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def v4, v5 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NEXT: v_mov_b32_e32 v5, v4 ; GFX940-NEXT: global_store_dwordx2 v0, v[4:5], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32 } asm "; def $0, $1", "={v4},={v5}"() %asm.0 = extractvalue { i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32 } %asm, 1 %insert0 = insertelement <2 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <2 x i32> %insert0, i32 %asm.1, i32 1 %shuffle = shufflevector <2 x i32> %insert1, <2 x i32> poison, <2 x i32> zeroinitializer store <2 x i32> %shuffle, ptr addrspace(1) %ptr, align 8 ret void } define void @shufflevector_v2i32_11_physreg_even_vgpr_pair_copy(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v2i32_11_physreg_even_vgpr_pair_copy: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v4, v5 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v6, v5 ; GFX900-NEXT: global_store_dwordx2 v0, v[5:6], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v2i32_11_physreg_even_vgpr_pair_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v4, v5 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v2, v5 ; GFX90A-NEXT: v_mov_b32_e32 v3, v5 ; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v2i32_11_physreg_even_vgpr_pair_copy: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def v4, v5 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v2, v5 ; GFX940-NEXT: v_mov_b32_e32 v3, v5 ; GFX940-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32 } asm "; def $0, $1", "={v4},={v5}"() %asm.0 = extractvalue { i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32 } %asm, 1 %insert0 = insertelement <2 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <2 x i32> %insert0, i32 %asm.1, i32 1 %shuffle = shufflevector <2 x i32> %insert1, <2 x i32> poison, <2 x i32> splat (i32 1) store <2 x i32> %shuffle, ptr addrspace(1) %ptr, align 8 ret void } define void @shufflevector_v4i32_3210_physreg_even_vgpr_quad_copy(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v4i32_3210_physreg_even_vgpr_quad_copy: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v4, v5, v6, v7 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v3, v5 ; GFX900-NEXT: v_mov_b32_e32 v2, v6 ; GFX900-NEXT: v_mov_b32_e32 v1, v7 ; GFX900-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v4i32_3210_physreg_even_vgpr_quad_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v4, v5, v6, v7 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v10, v5 ; GFX90A-NEXT: v_mov_b32_e32 v9, v6 ; GFX90A-NEXT: v_mov_b32_e32 v8, v7 ; GFX90A-NEXT: v_mov_b32_e32 v11, v4 ; GFX90A-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v4i32_3210_physreg_even_vgpr_quad_copy: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def v4, v5, v6, v7 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v10, v5 ; GFX940-NEXT: v_mov_b32_e32 v9, v6 ; GFX940-NEXT: v_mov_b32_e32 v8, v7 ; GFX940-NEXT: v_mov_b32_e32 v11, v4 ; GFX940-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32, i32, i32 } asm "; def $0, $1, $2, $3", "={v4},={v5},={v6},={v7}"() %asm.0 = extractvalue { i32, i32, i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32, i32, i32 } %asm, 1 %asm.2 = extractvalue { i32, i32, i32, i32 } %asm, 2 %asm.3 = extractvalue { i32, i32, i32, i32 } %asm, 3 %insert0 = insertelement <4 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <4 x i32> %insert0, i32 %asm.1, i32 1 %insert2 = insertelement <4 x i32> %insert1, i32 %asm.2, i32 2 %insert3 = insertelement <4 x i32> %insert2, i32 %asm.3, i32 3 %shuffle = shufflevector <4 x i32> %insert3, <4 x i32> poison, <4 x i32> store <4 x i32> %shuffle, ptr addrspace(1) %ptr, align 8 ret void } define void @shufflevector_v4i32_1032_physreg_even_vgpr_quad_copy(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v4i32_1032_physreg_even_vgpr_quad_copy: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v4, v5, v6, v7 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: v_mov_b32_e32 v3, v5 ; GFX900-NEXT: v_mov_b32_e32 v5, v7 ; GFX900-NEXT: global_store_dwordx4 v0, v[3:6], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v4i32_1032_physreg_even_vgpr_quad_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v4, v5, v6, v7 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v8, v5 ; GFX90A-NEXT: v_mov_b32_e32 v11, v6 ; GFX90A-NEXT: v_mov_b32_e32 v10, v7 ; GFX90A-NEXT: v_mov_b32_e32 v9, v4 ; GFX90A-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v4i32_1032_physreg_even_vgpr_quad_copy: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def v4, v5, v6, v7 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v8, v5 ; GFX940-NEXT: v_mov_b32_e32 v11, v6 ; GFX940-NEXT: v_mov_b32_e32 v10, v7 ; GFX940-NEXT: v_mov_b32_e32 v9, v4 ; GFX940-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32, i32, i32 } asm "; def $0, $1, $2, $3", "={v4},={v5},={v6},={v7}"() %asm.0 = extractvalue { i32, i32, i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32, i32, i32 } %asm, 1 %asm.2 = extractvalue { i32, i32, i32, i32 } %asm, 2 %asm.3 = extractvalue { i32, i32, i32, i32 } %asm, 3 %insert0 = insertelement <4 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <4 x i32> %insert0, i32 %asm.1, i32 1 %insert2 = insertelement <4 x i32> %insert1, i32 %asm.2, i32 2 %insert3 = insertelement <4 x i32> %insert2, i32 %asm.3, i32 3 %shuffle = shufflevector <4 x i32> %insert3, <4 x i32> poison, <4 x i32> store <4 x i32> %shuffle, ptr addrspace(1) %ptr, align 8 ret void } define void @shufflevector_v4i32_1132_physreg_even_vgpr_quad_copy(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v4i32_1132_physreg_even_vgpr_quad_copy: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v4, v5, v6, v7 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: v_mov_b32_e32 v8, v6 ; GFX900-NEXT: v_mov_b32_e32 v6, v5 ; GFX900-NEXT: global_store_dwordx4 v0, v[5:8], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v4i32_1132_physreg_even_vgpr_quad_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v8, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v4, v5, v6, v7 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v0, v5 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[6:7] op_sel:[1,0] ; GFX90A-NEXT: v_mov_b32_e32 v1, v5 ; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v4i32_1132_physreg_even_vgpr_quad_copy: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v8, 0 ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def v4, v5, v6, v7 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v0, v5 ; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[6:7] op_sel:[1,0] ; GFX940-NEXT: v_mov_b32_e32 v1, v5 ; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32, i32, i32 } asm "; def $0, $1, $2, $3", "={v4},={v5},={v6},={v7}"() %asm.0 = extractvalue { i32, i32, i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32, i32, i32 } %asm, 1 %asm.2 = extractvalue { i32, i32, i32, i32 } %asm, 2 %asm.3 = extractvalue { i32, i32, i32, i32 } %asm, 3 %insert0 = insertelement <4 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <4 x i32> %insert0, i32 %asm.1, i32 1 %insert2 = insertelement <4 x i32> %insert1, i32 %asm.2, i32 2 %insert3 = insertelement <4 x i32> %insert2, i32 %asm.3, i32 3 %shuffle = shufflevector <4 x i32> %insert3, <4 x i32> poison, <4 x i32> store <4 x i32> %shuffle, ptr addrspace(1) %ptr, align 8 ret void } define void @shufflevector_v4i32_3201_physreg_even_vgpr_quad_copy(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v4i32_3201_physreg_even_vgpr_quad_copy: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v4, v5, v6, v7 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v3, v6 ; GFX900-NEXT: v_mov_b32_e32 v2, v7 ; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v4i32_3201_physreg_even_vgpr_quad_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v4, v5, v6, v7 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v3, v6 ; GFX90A-NEXT: v_mov_b32_e32 v2, v7 ; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v4i32_3201_physreg_even_vgpr_quad_copy: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def v4, v5, v6, v7 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v3, v6 ; GFX940-NEXT: v_mov_b32_e32 v2, v7 ; GFX940-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32, i32, i32 } asm "; def $0, $1, $2, $3", "={v4},={v5},={v6},={v7}"() %asm.0 = extractvalue { i32, i32, i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32, i32, i32 } %asm, 1 %asm.2 = extractvalue { i32, i32, i32, i32 } %asm, 2 %asm.3 = extractvalue { i32, i32, i32, i32 } %asm, 3 %insert0 = insertelement <4 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <4 x i32> %insert0, i32 %asm.1, i32 1 %insert2 = insertelement <4 x i32> %insert1, i32 %asm.2, i32 2 %insert3 = insertelement <4 x i32> %insert2, i32 %asm.3, i32 3 %shuffle = shufflevector <4 x i32> %insert3, <4 x i32> poison, <4 x i32> store <4 x i32> %shuffle, ptr addrspace(1) %ptr, align 8 ret void } define void @shufflevector_v2i32_10_physreg_even_sgpr_pair_copy() { ; GFX900-LABEL: shufflevector_v2i32_10_physreg_even_sgpr_pair_copy: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s4, s5 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s7, s4 ; GFX900-NEXT: s_mov_b32 s6, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[6:7] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v2i32_10_physreg_even_sgpr_pair_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s4, s5 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s7, s4 ; GFX90A-NEXT: s_mov_b32 s6, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[6:7] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v2i32_10_physreg_even_sgpr_pair_copy: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def s4, s5 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_mov_b32 s1, s4 ; GFX940-NEXT: s_mov_b32 s0, s5 ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; use s[0:1] ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32 } asm "; def $0, $1", "={s4},={s5}"() %asm.0 = extractvalue { i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32 } %asm, 1 %insert0 = insertelement <2 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <2 x i32> %insert0, i32 %asm.1, i32 1 %shuffle = shufflevector <2 x i32> %insert1, <2 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "s"(<2 x i32> %shuffle) ret void } define void @shufflevector_v2i32_10_physreg_odd_sgpr_pair_copy() { ; GFX9-LABEL: shufflevector_v2i32_10_physreg_odd_sgpr_pair_copy: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s5, s6 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s7, s5 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s[6:7] ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32 } asm "; def $0, $1", "={s5},={s6}"() %asm.0 = extractvalue { i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32 } %asm, 1 %insert0 = insertelement <2 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <2 x i32> %insert0, i32 %asm.1, i32 1 %shuffle = shufflevector <2 x i32> %insert1, <2 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "s"(<2 x i32> %shuffle) ret void } define void @shufflevector_v2i32_10_physreg_even_agpr_pair_copy(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v2i32_10_physreg_even_agpr_pair_copy: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v2, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def a4, a5 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_accvgpr_read_b32 v0, a5 ; GFX900-NEXT: v_accvgpr_read_b32 v1, a4 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v2i32_10_physreg_even_agpr_pair_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a4, a5 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_accvgpr_mov_b32 a1, a4 ; GFX90A-NEXT: v_accvgpr_mov_b32 a0, a5 ; GFX90A-NEXT: global_store_dwordx2 v0, a[0:1], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v2i32_10_physreg_even_agpr_pair_copy: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def a4, a5 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_accvgpr_mov_b32 a1, a4 ; GFX940-NEXT: v_accvgpr_mov_b32 a0, a5 ; GFX940-NEXT: global_store_dwordx2 v0, a[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32 } asm "; def $0, $1", "={a4},={a5}"() %asm.0 = extractvalue { i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32 } %asm, 1 %insert0 = insertelement <2 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <2 x i32> %insert0, i32 %asm.1, i32 1 %shuffle = shufflevector <2 x i32> %insert1, <2 x i32> poison, <2 x i32> store <2 x i32> %shuffle, ptr addrspace(1) %ptr, align 8 ret void } define void @shufflevector_v2i32_10_physreg_odd_agpr_pair_copy(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v2i32_10_physreg_odd_agpr_pair_copy: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v2, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def a5, a6 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_accvgpr_read_b32 v0, a6 ; GFX900-NEXT: v_accvgpr_read_b32 v1, a5 ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v2i32_10_physreg_odd_agpr_pair_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a5, a6 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_accvgpr_mov_b32 a4, a6 ; GFX90A-NEXT: global_store_dwordx2 v0, a[4:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v2i32_10_physreg_odd_agpr_pair_copy: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def a5, a6 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_accvgpr_mov_b32 a4, a6 ; GFX940-NEXT: global_store_dwordx2 v0, a[4:5], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32 } asm "; def $0, $1", "={a5},={a6}"() %asm.0 = extractvalue { i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32 } %asm, 1 %insert0 = insertelement <2 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <2 x i32> %insert0, i32 %asm.1, i32 1 %shuffle = shufflevector <2 x i32> %insert1, <2 x i32> poison, <2 x i32> store <2 x i32> %shuffle, ptr addrspace(1) %ptr, align 8 ret void } define i32 @shufflevector_v2i32_10_physreg_even_vgpr_pair_copy_other_use_elt0(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v2i32_10_physreg_even_vgpr_pair_copy_other_use_elt0: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v4, v5 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v0, v4 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v2i32_10_physreg_even_vgpr_pair_copy_other_use_elt0: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v4, v5 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v0, v4 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v2i32_10_physreg_even_vgpr_pair_copy_other_use_elt0: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def v4, v5 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v0, v4 ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32 } asm "; def $0, $1", "={v4},={v5}"() %asm.0 = extractvalue { i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32 } %asm, 1 %insert0 = insertelement <2 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <2 x i32> %insert0, i32 %asm.1, i32 1 %shuffle = shufflevector <2 x i32> %insert1, <2 x i32> poison, <2 x i32> ret i32 %asm.0 ; other use of copy } define i32 @shufflevector_v2i32_10_physreg_even_vgpr_pair_copy_other_use_elt1(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v2i32_10_physreg_even_vgpr_pair_copy_other_use_elt1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v4, v5 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v0, v5 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v2i32_10_physreg_even_vgpr_pair_copy_other_use_elt1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v4, v5 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v0, v5 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v2i32_10_physreg_even_vgpr_pair_copy_other_use_elt1: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def v4, v5 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v0, v5 ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32 } asm "; def $0, $1", "={v4},={v5}"() %asm.0 = extractvalue { i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32 } %asm, 1 %insert0 = insertelement <2 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <2 x i32> %insert0, i32 %asm.1, i32 1 %shuffle = shufflevector <2 x i32> %insert1, <2 x i32> poison, <2 x i32> ret i32 %asm.1 ; other use of copy } define i32 @shufflevector_v4i32_3210_physreg_even_vgpr_quad_copy_other_use_elt(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: shufflevector_v4i32_3210_physreg_even_vgpr_quad_copy_other_use_elt: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v4, v5, v6, v7 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v9, v5 ; GFX900-NEXT: v_mov_b32_e32 v8, v6 ; GFX900-NEXT: v_mov_b32_e32 v10, v4 ; GFX900-NEXT: global_store_dwordx4 v0, v[7:10], s[16:17] ; GFX900-NEXT: v_mov_b32_e32 v0, v6 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: shufflevector_v4i32_3210_physreg_even_vgpr_quad_copy_other_use_elt: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v4, v5, v6, v7 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v10, v5 ; GFX90A-NEXT: v_mov_b32_e32 v9, v6 ; GFX90A-NEXT: v_mov_b32_e32 v8, v7 ; GFX90A-NEXT: v_mov_b32_e32 v11, v4 ; GFX90A-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17] ; GFX90A-NEXT: v_mov_b32_e32 v0, v6 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: shufflevector_v4i32_3210_physreg_even_vgpr_quad_copy_other_use_elt: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NEXT: ;;#ASMSTART ; GFX940-NEXT: ; def v4, v5, v6, v7 ; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v10, v5 ; GFX940-NEXT: v_mov_b32_e32 v9, v6 ; GFX940-NEXT: v_mov_b32_e32 v8, v7 ; GFX940-NEXT: v_mov_b32_e32 v11, v4 ; GFX940-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GFX940-NEXT: v_mov_b32_e32 v0, v6 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] %asm = call { i32, i32, i32, i32 } asm "; def $0, $1, $2, $3", "={v4},={v5},={v6},={v7}"() %asm.0 = extractvalue { i32, i32, i32, i32 } %asm, 0 %asm.1 = extractvalue { i32, i32, i32, i32 } %asm, 1 %asm.2 = extractvalue { i32, i32, i32, i32 } %asm, 2 %asm.3 = extractvalue { i32, i32, i32, i32 } %asm, 3 %insert0 = insertelement <4 x i32> poison, i32 %asm.0, i32 0 %insert1 = insertelement <4 x i32> %insert0, i32 %asm.1, i32 1 %insert2 = insertelement <4 x i32> %insert1, i32 %asm.2, i32 2 %insert3 = insertelement <4 x i32> %insert2, i32 %asm.3, i32 3 %shuffle = shufflevector <4 x i32> %insert3, <4 x i32> poison, <4 x i32> store <4 x i32> %shuffle, ptr addrspace(1) %ptr, align 8 ret i32 %asm.2 } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX90APLUS: {{.*}}