aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll3539
1 files changed, 1652 insertions, 1887 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll
index 8039e12..3253b49 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll
@@ -58,12 +58,11 @@ define void @v_shuffle_v4p3_v3p3__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__1_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -100,36 +99,33 @@ define void @v_shuffle_v4p3_v3p3__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -153,12 +149,11 @@ define void @v_shuffle_v4p3_v3p3__4_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__4_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -196,36 +191,33 @@ define void @v_shuffle_v4p3_v3p3__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -239,47 +231,45 @@ define void @v_shuffle_v4p3_v3p3__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_0_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_0_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -293,46 +283,43 @@ define void @v_shuffle_v4p3_v3p3__5_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_1_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -346,16 +333,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -363,15 +348,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -379,15 +363,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -403,37 +386,35 @@ define void @v_shuffle_v4p3_v3p3__5_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -447,36 +428,37 @@ define void @v_shuffle_v4p3_v3p3__5_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_4_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_4_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -490,39 +472,37 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -536,49 +516,46 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -593,15 +570,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -609,16 +585,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -626,17 +601,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -651,15 +624,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -667,15 +639,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -683,16 +654,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -708,11 +678,11 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -721,11 +691,11 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -734,12 +704,12 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -753,41 +723,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -801,39 +770,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -847,50 +817,51 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -905,15 +876,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -921,15 +892,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -937,16 +908,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -961,15 +932,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -977,16 +948,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -994,16 +965,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1019,43 +990,41 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1069,42 +1038,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1118,42 +1088,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1181,13 +1152,12 @@ define void @v_shuffle_v4p3_v3p3__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1195,13 +1165,12 @@ define void @v_shuffle_v4p3_v3p3__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1263,42 +1232,39 @@ define void @v_shuffle_v4p3_v3p3__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__1_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1311,45 +1277,43 @@ define void @v_shuffle_v4p3_v3p3__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1376,13 +1340,12 @@ define void @v_shuffle_v4p3_v3p3__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1390,13 +1353,12 @@ define void @v_shuffle_v4p3_v3p3__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1410,16 +1372,15 @@ define void @v_shuffle_v4p3_v3p3__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1427,15 +1388,14 @@ define void @v_shuffle_v4p3_v3p3__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -1444,16 +1404,15 @@ define void @v_shuffle_v4p3_v3p3__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1469,16 +1428,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1486,17 +1444,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1504,17 +1460,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1529,16 +1483,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1546,16 +1498,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1563,16 +1513,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1587,16 +1535,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1604,17 +1551,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1622,17 +1567,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1647,16 +1590,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1664,17 +1606,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1682,17 +1622,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1707,16 +1645,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1726,15 +1663,13 @@ define void @v_shuffle_v4p3_v3p3__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v9, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:8]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1744,15 +1679,13 @@ define void @v_shuffle_v4p3_v3p3__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:8]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1767,15 +1700,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1783,16 +1716,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1800,16 +1732,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1824,16 +1755,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1841,17 +1771,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1859,17 +1787,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1883,17 +1809,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1901,16 +1825,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1918,16 +1841,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1941,17 +1863,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1959,16 +1880,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1976,17 +1896,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2000,53 +1918,51 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2060,53 +1976,51 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v8
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v8
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2120,17 +2034,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2138,16 +2051,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2155,17 +2067,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2272,11 +2183,11 @@ define void @v_shuffle_v4p3_v3p3__1_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2317,42 +2228,43 @@ define void @v_shuffle_v4p3_v3p3__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2411,16 +2323,15 @@ define void @v_shuffle_v4p3_v3p3__4_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2470,16 +2381,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2487,16 +2397,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2504,17 +2413,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2529,15 +2436,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2545,16 +2451,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2562,17 +2466,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2587,15 +2488,14 @@ define void @v_shuffle_v4p3_v3p3__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2603,16 +2503,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2620,16 +2519,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2644,16 +2542,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2661,16 +2558,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2678,17 +2574,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2705,14 +2599,13 @@ define void @v_shuffle_v4p3_v3p3__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2720,16 +2613,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2737,16 +2629,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2761,15 +2652,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2777,15 +2668,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2793,16 +2684,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2817,16 +2707,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2834,16 +2723,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2851,17 +2739,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2875,50 +2761,46 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2932,49 +2814,46 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2989,16 +2868,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3006,16 +2884,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3023,17 +2900,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3048,16 +2923,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3065,16 +2939,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3082,17 +2955,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3107,16 +2979,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3124,16 +2995,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3141,17 +3011,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3165,13 +3034,13 @@ define void @v_shuffle_v4p3_v3p3__u_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__u_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3255,14 +3124,13 @@ define void @v_shuffle_v4p3_v3p3__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3272,9 +3140,8 @@ define void @v_shuffle_v4p3_v3p3__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
@@ -3287,8 +3154,7 @@ define void @v_shuffle_v4p3_v3p3__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -3303,42 +3169,43 @@ define void @v_shuffle_v4p3_v3p3__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3351,13 +3218,13 @@ define void @v_shuffle_v4p3_v3p3__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3397,16 +3264,15 @@ define void @v_shuffle_v4p3_v3p3__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3416,12 +3282,11 @@ define void @v_shuffle_v4p3_v3p3__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
@@ -3434,12 +3299,12 @@ define void @v_shuffle_v4p3_v3p3__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0]
; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3454,17 +3319,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3472,16 +3336,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3489,16 +3352,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3512,48 +3374,46 @@ define void @v_shuffle_v4p3_v3p3__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3568,15 +3428,14 @@ define void @v_shuffle_v4p3_v3p3__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3584,17 +3443,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:8]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3602,17 +3460,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:8]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3626,48 +3483,46 @@ define void @v_shuffle_v4p3_v3p3__5_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3684,14 +3539,13 @@ define void @v_shuffle_v4p3_v3p3__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3699,16 +3553,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3716,16 +3569,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3740,15 +3592,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3758,14 +3610,13 @@ define void @v_shuffle_v4p3_v3p3__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3775,14 +3626,13 @@ define void @v_shuffle_v4p3_v3p3__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3797,16 +3647,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3814,16 +3663,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3831,16 +3679,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3855,15 +3702,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3871,16 +3717,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3888,16 +3733,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3912,16 +3756,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[5:7]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3929,16 +3772,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:8]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3946,16 +3788,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:8]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v8
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3970,15 +3811,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3986,17 +3826,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4004,17 +3842,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4029,16 +3866,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4046,17 +3882,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:8]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v8
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4064,17 +3899,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:8]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v8
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4089,16 +3923,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4106,17 +3939,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4124,18 +3955,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4199,12 +4028,11 @@ define void @v_shuffle_v4p3_v3p3__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__1_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4241,36 +4069,33 @@ define void @v_shuffle_v4p3_v3p3__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4295,42 +4120,39 @@ define void @v_shuffle_v4p3_v3p3__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4344,45 +4166,43 @@ define void @v_shuffle_v4p3_v3p3__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4396,43 +4216,40 @@ define void @v_shuffle_v4p3_v3p3__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4447,15 +4264,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4463,17 +4280,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4481,17 +4297,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4506,15 +4321,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4522,16 +4337,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4539,17 +4353,16 @@ define void @v_shuffle_v4p3_v3p3__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4564,16 +4377,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4581,17 +4393,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4599,17 +4410,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4625,43 +4435,41 @@ define void @v_shuffle_v4p3_v3p3__5_4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4677,43 +4485,41 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4727,42 +4533,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4777,15 +4581,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4793,16 +4597,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4810,17 +4613,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4835,15 +4637,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4851,16 +4653,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4868,17 +4669,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4893,15 +4693,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4909,16 +4709,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4926,17 +4725,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4952,40 +4750,39 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5102,16 +4899,15 @@ define void @v_shuffle_v4p3_v3p3__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5161,16 +4957,15 @@ define void @v_shuffle_v4p3_v3p3__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5178,16 +4973,15 @@ define void @v_shuffle_v4p3_v3p3__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5195,17 +4989,15 @@ define void @v_shuffle_v4p3_v3p3__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5268,11 +5060,11 @@ define void @v_shuffle_v4p3_v3p3__4_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5314,42 +5106,43 @@ define void @v_shuffle_v4p3_v3p3__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5363,41 +5156,40 @@ define void @v_shuffle_v4p3_v3p3__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5412,15 +5204,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5428,17 +5220,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5446,17 +5237,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5471,15 +5261,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5487,15 +5277,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5503,16 +5293,16 @@ define void @v_shuffle_v4p3_v3p3__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5527,16 +5317,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5544,17 +5333,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5562,17 +5350,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5586,41 +5373,43 @@ define void @v_shuffle_v4p3_v3p3__5_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5634,42 +5423,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5683,41 +5473,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5732,16 +5521,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5749,16 +5537,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5766,17 +5553,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5791,16 +5577,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5808,17 +5593,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5826,17 +5610,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5851,16 +5634,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5868,16 +5650,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5885,17 +5666,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5909,40 +5689,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5956,13 +5739,13 @@ define void @v_shuffle_v4p3_v3p3__u_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__u_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6061,16 +5844,15 @@ define void @v_shuffle_v4p3_v3p3__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6083,9 +5865,8 @@ define void @v_shuffle_v4p3_v3p3__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
@@ -6102,8 +5883,7 @@ define void @v_shuffle_v4p3_v3p3__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -6120,16 +5900,15 @@ define void @v_shuffle_v4p3_v3p3__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6137,16 +5916,16 @@ define void @v_shuffle_v4p3_v3p3__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6154,17 +5933,16 @@ define void @v_shuffle_v4p3_v3p3__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6224,14 +6002,13 @@ define void @v_shuffle_v4p3_v3p3__4_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__4_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6241,9 +6018,8 @@ define void @v_shuffle_v4p3_v3p3__4_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0]
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
@@ -6256,8 +6032,7 @@ define void @v_shuffle_v4p3_v3p3__4_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -6273,39 +6048,40 @@ define void @v_shuffle_v4p3_v3p3__5_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6320,16 +6096,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6337,16 +6112,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6354,16 +6129,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6378,16 +6153,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6395,16 +6169,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6412,17 +6185,16 @@ define void @v_shuffle_v4p3_v3p3__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6439,14 +6211,13 @@ define void @v_shuffle_v4p3_v3p3__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6456,14 +6227,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6473,15 +6244,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6495,43 +6265,43 @@ define void @v_shuffle_v4p3_v3p3__5_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6545,39 +6315,43 @@ define void @v_shuffle_v4p3_v3p3__5_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6591,41 +6365,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6640,16 +6413,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6657,16 +6429,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6674,17 +6445,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6699,16 +6469,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6716,16 +6485,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6733,17 +6501,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6758,16 +6525,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6775,16 +6541,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6792,17 +6557,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6816,42 +6580,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6865,41 +6630,41 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()