diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll | 3539 |
1 files changed, 1652 insertions, 1887 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll index 8039e12..3253b49 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll @@ -58,12 +58,11 @@ define void @v_shuffle_v4p3_v3p3__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__1_u_u_u: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v1 -; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -100,36 +99,33 @@ define void @v_shuffle_v4p3_v3p3__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_u_u_u: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_u_u_u: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v3, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v3, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_u_u_u: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -153,12 +149,11 @@ define void @v_shuffle_v4p3_v3p3__4_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__4_u_u_u: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v1 -; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -196,36 +191,33 @@ define void @v_shuffle_v4p3_v3p3__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_u_u: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_u_u: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v3, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v3, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_u_u: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -239,47 +231,45 @@ define void @v_shuffle_v4p3_v3p3__5_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_0_u_u: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_0_u_u: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] -; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v4 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_0_u_u: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] -; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -293,46 +283,43 @@ define void @v_shuffle_v4p3_v3p3__5_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_1_u_u: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] -; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_1_u_u: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] -; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v5, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_u_u: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] -; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v5, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -346,16 +333,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_2_u_u: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[1:3] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -363,15 +348,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v3, 0 +; GFX90A-NEXT: v_mov_b32_e32 v5, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, v4 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -379,15 +363,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -403,37 +386,35 @@ define void @v_shuffle_v4p3_v3p3__5_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_u_u: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v0 +; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_u_u: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -447,36 +428,37 @@ define void @v_shuffle_v4p3_v3p3__5_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_4_u_u: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v1 +; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_4_u_u: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v3, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v1 +; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_u_u: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -490,39 +472,37 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_u: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_u: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v3, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_u: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -536,49 +516,46 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_0_u: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v3, v2 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_0_u: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_u: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -593,15 +570,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -609,16 +585,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v3 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v5 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -626,17 +601,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v3 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -651,15 +624,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -667,15 +639,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v3, 0 +; GFX90A-NEXT: v_mov_b32_e32 v5, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -683,16 +654,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -708,11 +678,11 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v1, v4 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -721,11 +691,11 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v5, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -734,12 +704,12 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: v_mov_b32_e32 v5, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -753,41 +723,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v1, v3 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v5, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v3 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: v_mov_b32_e32 v2, v3 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -801,39 +770,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_5_u: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_u: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v5, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v3, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_u: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -847,50 +817,51 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_5_0: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v8, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v2 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[5:7] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_0: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v9, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[6:8] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v6 +; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v9, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[6:8] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -905,15 +876,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -921,15 +892,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -937,16 +908,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -961,15 +932,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -977,16 +948,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v6 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -994,16 +965,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1019,43 +990,41 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: v_mov_b32_e32 v2, v5 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v2, v6 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v0 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1069,42 +1038,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_5_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v1, v4 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1118,42 +1088,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 ; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 ; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1181,13 +1152,12 @@ define void @v_shuffle_v4p3_v3p3__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_mov_b32_e32 v1, v0 -; GFX90A-NEXT: v_mov_b32_e32 v2, v0 -; GFX90A-NEXT: v_mov_b32_e32 v3, v0 -; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 +; GFX90A-NEXT: v_mov_b32_e32 v1, v2 +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -1195,13 +1165,12 @@ define void @v_shuffle_v4p3_v3p3__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1263,42 +1232,39 @@ define void @v_shuffle_v4p3_v3p3__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v5, 0 +; GFX900-NEXT: v_mov_b32_e32 v2, v0 +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__1_0_0_0: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v5, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_0_0_0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1311,45 +1277,43 @@ define void @v_shuffle_v4p3_v3p3__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_0_0_0: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_0_0_0: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v0 +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v0 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_0_0_0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1376,13 +1340,12 @@ define void @v_shuffle_v4p3_v3p3__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_mov_b32_e32 v1, v0 -; GFX90A-NEXT: v_mov_b32_e32 v2, v0 -; GFX90A-NEXT: v_mov_b32_e32 v3, v0 -; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 +; GFX90A-NEXT: v_mov_b32_e32 v1, v2 +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -1390,13 +1353,12 @@ define void @v_shuffle_v4p3_v3p3__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1410,16 +1372,15 @@ define void @v_shuffle_v4p3_v3p3__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v2, v3 +; GFX900-NEXT: v_mov_b32_e32 v4, v3 +; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -1427,15 +1388,14 @@ define void @v_shuffle_v4p3_v3p3__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 ; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -1444,16 +1404,15 @@ define void @v_shuffle_v4p3_v3p3__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1469,16 +1428,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v4, v3 +; GFX900-NEXT: v_mov_b32_e32 v5, v3 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -1486,17 +1444,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, v4 +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -1504,17 +1460,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1529,16 +1483,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v3, 0 +; GFX900-NEXT: v_mov_b32_e32 v5, v4 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -1546,16 +1498,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, 0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -1563,16 +1513,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1587,16 +1535,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v7, 0 +; GFX900-NEXT: v_mov_b32_e32 v3, v5 +; GFX900-NEXT: v_mov_b32_e32 v5, v4 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v4 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -1604,17 +1551,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, v5 +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v5 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -1622,17 +1567,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v5 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1647,16 +1590,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v7, 0 +; GFX900-NEXT: v_mov_b32_e32 v3, v6 +; GFX900-NEXT: v_mov_b32_e32 v5, v4 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -1664,17 +1606,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, v6 +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -1682,17 +1622,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1707,16 +1645,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[4:6] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v4 +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -1726,15 +1663,13 @@ define void @v_shuffle_v4p3_v3p3__5_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v9, 0 +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[6:8] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v8 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -1744,15 +1679,13 @@ define void @v_shuffle_v4p3_v3p3__5_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v9, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v0, v8 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1767,15 +1700,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v4 +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -1783,16 +1716,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -1800,16 +1732,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1824,16 +1755,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v4 +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -1841,17 +1771,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -1859,17 +1787,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1883,17 +1809,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_0: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v3, v2 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[5:7] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -1901,16 +1825,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 ; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -1918,16 +1841,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -1941,17 +1863,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_1_0: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v8, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[5:7] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v6 +; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -1959,16 +1880,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -1976,17 +1896,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2000,53 +1918,51 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_2_0: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v8, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[5:7] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: v_mov_b32_e32 v2, v5 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v7 +; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_2_0: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v9, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[6:8] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v6 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v8 +; GFX90A-NEXT: v_mov_b32_e32 v5, v6 +; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_2_0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v9, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[6:8] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2060,53 +1976,51 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_3_0: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v8, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v7, 0 +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[4:6] +; GFX900-NEXT: ; def v[5:7] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v6 -; GFX900-NEXT: v_mov_b32_e32 v1, v6 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_0: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] -; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v9, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[6:8] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v8 -; GFX90A-NEXT: v_mov_b32_e32 v1, v8 -; GFX90A-NEXT: v_mov_b32_e32 v2, v6 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17] +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v6 +; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_0: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] -; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v9, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[6:8] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: v_mov_b32_e32 v0, v8 -; GFX942-NEXT: v_mov_b32_e32 v1, v8 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2120,17 +2034,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_4_0: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v8, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v7, 0 +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v1 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[4:6] +; GFX900-NEXT: ; def v[5:7] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v1, v3 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -2138,16 +2051,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -2155,17 +2067,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2272,11 +2183,11 @@ define void @v_shuffle_v4p3_v3p3__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: v_mov_b32_e32 v2, v1 ; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -2317,42 +2228,43 @@ define void @v_shuffle_v4p3_v3p3__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_1_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 ; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_1_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v1 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1 -; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_1_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2411,16 +2323,15 @@ define void @v_shuffle_v4p3_v3p3__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v2, v3 +; GFX900-NEXT: v_mov_b32_e32 v4, v3 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -2470,16 +2381,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v4, v3 +; GFX900-NEXT: v_mov_b32_e32 v5, v3 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -2487,16 +2397,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 +; GFX90A-NEXT: v_mov_b32_e32 v4, v3 +; GFX90A-NEXT: v_mov_b32_e32 v5, v3 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v1 -; GFX90A-NEXT: v_mov_b32_e32 v3, v1 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -2504,17 +2413,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v2, v1 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2529,15 +2436,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v5, v4 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -2545,16 +2451,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v3, 0 +; GFX90A-NEXT: v_mov_b32_e32 v4, v5 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v1 -; GFX90A-NEXT: v_mov_b32_e32 v3, v1 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -2562,17 +2466,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v2, v1 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2587,15 +2488,14 @@ define void @v_shuffle_v4p3_v3p3__5_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v5, v4 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -2603,16 +2503,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, v4 +; GFX90A-NEXT: v_mov_b32_e32 v4, v5 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v3 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -2620,16 +2519,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v2, v3 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2644,16 +2542,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v7, 0 +; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v3, v5 +; GFX900-NEXT: v_mov_b32_e32 v5, v4 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[4:6] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v6 -; GFX900-NEXT: v_mov_b32_e32 v1, v3 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -2661,16 +2558,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, v6 +; GFX90A-NEXT: v_mov_b32_e32 v4, v5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v3 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -2678,17 +2574,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: v_mov_b32_e32 v2, v3 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2705,14 +2599,13 @@ define void @v_shuffle_v4p3_v3p3__5_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v4 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -2720,16 +2613,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v3 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v0 +; GFX90A-NEXT: v_mov_b32_e32 v4, v5 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -2737,16 +2629,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v2, v3 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2761,15 +2652,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v4 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -2777,15 +2668,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v3 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v1 +; GFX90A-NEXT: v_mov_b32_e32 v4, v5 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -2793,16 +2684,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v3 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2817,16 +2707,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v5, v4 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -2834,16 +2723,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v3 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v5 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -2851,17 +2739,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v3 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2875,50 +2761,46 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v3, v2 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2932,49 +2814,46 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_0_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v7, 0 +; GFX900-NEXT: v_mov_b32_e32 v3, v2 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v6 -; GFX900-NEXT: v_mov_b32_e32 v1, v6 -; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_0_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -2989,16 +2868,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v6 +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3006,16 +2884,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v6 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -3023,17 +2900,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3048,16 +2923,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[4:6] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v6 -; GFX900-NEXT: v_mov_b32_e32 v1, v6 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3065,16 +2939,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -3082,17 +2955,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3107,16 +2979,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v1, v3 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3124,16 +2995,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v2, v5 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -3141,17 +3011,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v2, v5 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3165,13 +3034,13 @@ define void @v_shuffle_v4p3_v3p3__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__u_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 ; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3255,14 +3124,13 @@ define void @v_shuffle_v4p3_v3p3__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__1_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v1 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 ; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3272,9 +3140,8 @@ define void @v_shuffle_v4p3_v3p3__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] ; GFX90A-NEXT: v_mov_b32_e32 v3, v2 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -3287,8 +3154,7 @@ define void @v_shuffle_v4p3_v3p3__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] ; GFX942-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) @@ -3303,42 +3169,43 @@ define void @v_shuffle_v4p3_v3p3__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 ; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 ; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3351,13 +3218,13 @@ define void @v_shuffle_v4p3_v3p3__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__3_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 ; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3397,16 +3264,15 @@ define void @v_shuffle_v4p3_v3p3__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[1:3] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v5, 0 +; GFX900-NEXT: v_mov_b32_e32 v2, v3 +; GFX900-NEXT: v_mov_b32_e32 v4, v3 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3416,12 +3282,11 @@ define void @v_shuffle_v4p3_v3p3__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] ; GFX90A-NEXT: v_mov_b32_e32 v3, v2 ; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -3434,12 +3299,12 @@ define void @v_shuffle_v4p3_v3p3__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] ; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] ; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3454,17 +3319,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[1:3] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v4, v3 +; GFX900-NEXT: v_mov_b32_e32 v5, v3 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3472,16 +3336,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, v4 +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -3489,16 +3352,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3512,48 +3374,46 @@ define void @v_shuffle_v4p3_v3p3__5_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v5, v4 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3568,15 +3428,14 @@ define void @v_shuffle_v4p3_v3p3__5_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v7, 0 +; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v4, v5 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[4:6] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v6 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3584,17 +3443,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, v4 +; GFX90A-NEXT: v_mov_b32_e32 v4, v6 +; GFX90A-NEXT: v_mov_b32_e32 v5, v6 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[6:8] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v8 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -3602,17 +3460,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v6 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v8 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3626,48 +3483,46 @@ define void @v_shuffle_v4p3_v3p3__5_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_1_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v5, v4 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_1_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3684,14 +3539,13 @@ define void @v_shuffle_v4p3_v3p3__5_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v4 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3699,16 +3553,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -3716,16 +3569,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3740,15 +3592,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v4 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3758,14 +3610,13 @@ define void @v_shuffle_v4p3_v3p3__5_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -3775,14 +3626,13 @@ define void @v_shuffle_v4p3_v3p3__5_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3797,16 +3647,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 ; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v5, v4 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3814,16 +3663,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 ; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -3831,16 +3679,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3855,15 +3702,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3871,16 +3717,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 ; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v5, v4 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -3888,16 +3733,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v4 ; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3912,16 +3756,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v8, 0 +; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[5:7] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v7 -; GFX900-NEXT: v_mov_b32_e32 v1, v7 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v6 +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3929,16 +3772,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[6:8] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v8 -; GFX90A-NEXT: v_mov_b32_e32 v1, v8 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v6 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -3946,16 +3788,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: v_mov_b32_e32 v0, v8 -; GFX942-NEXT: v_mov_b32_e32 v1, v8 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -3970,15 +3811,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v7, 0 +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[4:6] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v6 -; GFX900-NEXT: v_mov_b32_e32 v1, v6 -; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -3986,17 +3826,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -4004,17 +3842,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4029,16 +3866,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v7, 0 +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[4:6] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v6 -; GFX900-NEXT: v_mov_b32_e32 v1, v6 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -4046,17 +3882,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[6:8] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v8 -; GFX90A-NEXT: v_mov_b32_e32 v1, v8 -; GFX90A-NEXT: v_mov_b32_e32 v2, v6 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v6 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -4064,17 +3899,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: v_mov_b32_e32 v0, v8 -; GFX942-NEXT: v_mov_b32_e32 v1, v8 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4089,16 +3923,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v1, v3 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -4106,17 +3939,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -4124,18 +3955,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4199,12 +4028,11 @@ define void @v_shuffle_v4p3_v3p3__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__1_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v1 -; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -4241,36 +4069,33 @@ define void @v_shuffle_v4p3_v3p3__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v3, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v3, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v3, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4295,42 +4120,39 @@ define void @v_shuffle_v4p3_v3p3__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v5, 0 +; GFX900-NEXT: v_mov_b32_e32 v2, v0 +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__4_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v5, 0 +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__4_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4344,45 +4166,43 @@ define void @v_shuffle_v4p3_v3p3__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v0 +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v0 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4396,43 +4216,40 @@ define void @v_shuffle_v4p3_v3p3__5_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v1, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v0 +; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4447,15 +4264,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -4463,17 +4280,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 ; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v0 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -4481,17 +4297,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 ; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4506,15 +4321,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -4522,16 +4337,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v0 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -4539,17 +4353,16 @@ define void @v_shuffle_v4p3_v3p3__5_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4564,16 +4377,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[1:3] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -4581,17 +4393,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 ; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v0 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -4599,17 +4410,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 ; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4625,43 +4435,41 @@ define void @v_shuffle_v4p3_v3p3__5_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v4 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v1 +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_4_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v5 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v1 +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v0 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v5 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4677,43 +4485,41 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v0 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4727,42 +4533,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v1, v4 ; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 ; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v5, v0 +; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4777,15 +4581,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -4793,16 +4597,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v0 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -4810,17 +4613,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4835,15 +4637,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -4851,16 +4653,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -4868,17 +4669,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4893,15 +4693,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -4909,16 +4709,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v0 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -4926,17 +4725,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -4952,40 +4750,39 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v0 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 -; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5102,16 +4899,15 @@ define void @v_shuffle_v4p3_v3p3__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v2, v3 +; GFX900-NEXT: v_mov_b32_e32 v4, v3 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -5161,16 +4957,15 @@ define void @v_shuffle_v4p3_v3p3__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v3, v4 +; GFX900-NEXT: v_mov_b32_e32 v5, v4 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 -; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -5178,16 +4973,15 @@ define void @v_shuffle_v4p3_v3p3__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, v5 +; GFX90A-NEXT: v_mov_b32_e32 v4, v5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v1 -; GFX90A-NEXT: v_mov_b32_e32 v3, v1 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -5195,17 +4989,15 @@ define void @v_shuffle_v4p3_v3p3__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v2, v1 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5268,11 +5060,11 @@ define void @v_shuffle_v4p3_v3p3__4_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: v_mov_b32_e32 v2, v1 ; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -5314,42 +5106,43 @@ define void @v_shuffle_v4p3_v3p3__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v2, v1 ; GFX900-NEXT: v_mov_b32_e32 v3, v1 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v1 ; GFX90A-NEXT: v_mov_b32_e32 v3, v1 -; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5363,41 +5156,40 @@ define void @v_shuffle_v4p3_v3p3__5_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v3, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v1 -; GFX90A-NEXT: v_mov_b32_e32 v3, v1 -; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v1 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5412,15 +5204,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -5428,17 +5220,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v5 -; GFX90A-NEXT: v_mov_b32_e32 v3, v5 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v4 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -5446,17 +5237,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v2, v5 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5471,15 +5261,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -5487,15 +5277,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v3 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -5503,16 +5293,16 @@ define void @v_shuffle_v4p3_v3p3__5_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v2, v3 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5527,16 +5317,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[1:3] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v1, v4 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -5544,17 +5333,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v5 -; GFX90A-NEXT: v_mov_b32_e32 v3, v5 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v4 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -5562,17 +5350,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v2, v5 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5586,41 +5373,43 @@ define void @v_shuffle_v4p3_v3p3__5_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_3_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v3 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v0 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v3 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5634,42 +5423,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v1, v3 ; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v3 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: v_mov_b32_e32 v2, v3 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5683,41 +5473,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v1, v3 ; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5732,16 +5521,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -5749,16 +5537,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v3, v5 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -5766,17 +5553,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5791,16 +5577,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -5808,17 +5593,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v2, v3 -; GFX90A-NEXT: v_mov_b32_e32 v3, v5 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v5 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -5826,17 +5610,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v2, v3 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5851,16 +5634,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -5868,16 +5650,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v3, v5 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -5885,17 +5666,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5909,40 +5689,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_3_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mov_b32_e32 v5, 0 +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v1, v4 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v1 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v1 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -5956,13 +5739,13 @@ define void @v_shuffle_v4p3_v3p3__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__u_5_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 ; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -6061,16 +5844,15 @@ define void @v_shuffle_v4p3_v3p3__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v2, v4 +; GFX900-NEXT: v_mov_b32_e32 v3, v4 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -6083,9 +5865,8 @@ define void @v_shuffle_v4p3_v3p3__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 ; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] ; GFX90A-NEXT: v_mov_b32_e32 v3, v2 ; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -6102,8 +5883,7 @@ define void @v_shuffle_v4p3_v3p3__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] ; GFX942-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) @@ -6120,16 +5900,15 @@ define void @v_shuffle_v4p3_v3p3__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 +; GFX900-NEXT: v_mov_b32_e32 v3, v5 +; GFX900-NEXT: v_mov_b32_e32 v4, v5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -6137,16 +5916,16 @@ define void @v_shuffle_v4p3_v3p3__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, v6 +; GFX90A-NEXT: v_mov_b32_e32 v4, v6 +; GFX90A-NEXT: v_mov_b32_e32 v5, v6 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -6154,17 +5933,16 @@ define void @v_shuffle_v4p3_v3p3__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v6 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -6224,14 +6002,13 @@ define void @v_shuffle_v4p3_v3p3__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__4_5_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v1 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 ; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -6241,9 +6018,8 @@ define void @v_shuffle_v4p3_v3p3__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] ; GFX90A-NEXT: v_mov_b32_e32 v3, v2 ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -6256,8 +6032,7 @@ define void @v_shuffle_v4p3_v3p3__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] ; GFX942-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) @@ -6273,39 +6048,40 @@ define void @v_shuffle_v4p3_v3p3__5_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v3, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -6320,16 +6096,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -6337,16 +6112,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v4 +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -6354,16 +6129,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -6378,16 +6153,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -6395,16 +6169,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -6412,17 +6185,16 @@ define void @v_shuffle_v4p3_v3p3__5_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -6439,14 +6211,13 @@ define void @v_shuffle_v4p3_v3p3__5_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[1:3] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v1, v3 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -6456,14 +6227,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v4 +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -6473,15 +6244,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -6495,43 +6265,43 @@ define void @v_shuffle_v4p3_v3p3__5_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_3_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v2, v3 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 -; GFX90A-NEXT: v_mov_b32_e32 v2, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v0 +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -6545,39 +6315,43 @@ define void @v_shuffle_v4p3_v3p3__5_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_4_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v2 -; GFX900-NEXT: v_mov_b32_e32 v3, v2 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v1 +; GFX900-NEXT: v_mov_b32_e32 v4, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_4_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v1 +; GFX90A-NEXT: v_mov_b32_e32 v4, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -6591,41 +6365,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v1, v3 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v2 -; GFX90A-NEXT: v_mov_b32_e32 v1, v2 ; GFX90A-NEXT: v_mov_b32_e32 v3, v2 -; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: v_mov_b32_e32 v3, v2 -; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -6640,16 +6413,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[4:6] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[4:6] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v1, v3 -; GFX900-NEXT: v_mov_b32_e32 v2, v4 -; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -6657,16 +6429,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[4:6] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_mov_b32_e32 v7, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v3, v6 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -6674,17 +6445,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[4:6] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v7, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v6 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -6699,16 +6469,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[3:5] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: v_mov_b32_e32 v3, v5 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -6716,16 +6485,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -6733,17 +6501,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -6758,16 +6525,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[0:2] +; GFX900-NEXT: ; def v[2:4] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[3:5] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v6, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v5 -; GFX900-NEXT: v_mov_b32_e32 v1, v5 -; GFX900-NEXT: v_mov_b32_e32 v3, v5 -; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; @@ -6775,16 +6541,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[0:2] +; GFX90A-NEXT: ; def v[2:4] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v7, 0 +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[4:6] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v6 -; GFX90A-NEXT: v_mov_b32_e32 v1, v6 -; GFX90A-NEXT: v_mov_b32_e32 v3, v6 -; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; @@ -6792,17 +6557,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ; def v[2:4] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v6 -; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -6816,42 +6580,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_3_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[2:4] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v4 -; GFX900-NEXT: v_mov_b32_e32 v1, v4 -; GFX900-NEXT: v_mov_b32_e32 v3, v4 -; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v0 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: v_mov_b32_e32 v3, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: v_mov_b32_e32 v4, v0 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() @@ -6865,41 +6630,41 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v6, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def v[1:3] +; GFX900-NEXT: ; def v[0:2] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: v_mov_b32_e32 v1, v3 -; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] +; GFX900-NEXT: v_mov_b32_e32 v3, v2 +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: v_mov_b32_e32 v5, v2 +; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v6, 0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def v[2:4] +; GFX90A-NEXT: ; def v[0:2] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, v4 -; GFX90A-NEXT: v_mov_b32_e32 v5, 0 -; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX90A-NEXT: v_mov_b32_e32 v1, v4 -; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0] +; GFX90A-NEXT: v_mov_b32_e32 v3, v2 +; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ; def v[0:2] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v5, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX942-NEXT: v_mov_b32_e32 v1, v4 -; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() |
