diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll | 151 |
1 files changed, 78 insertions, 73 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll index e532dea..f807169 100644 --- a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll @@ -11,22 +11,20 @@ define void @test_i8load_v4i8store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-SDAG: ; %bb.0: ; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 ; GCN-SDAG-NEXT: s_wait_kmcnt 0x0 -; GCN-SDAG-NEXT: global_load_u8 v2, v[2:3], off -; GCN-SDAG-NEXT: global_load_u8 v3, v[4:5], off -; GCN-SDAG-NEXT: global_load_u8 v0, v[0:1], off +; GCN-SDAG-NEXT: global_load_u8 v6, v[2:3], off +; GCN-SDAG-NEXT: global_load_u8 v7, v[4:5], off +; GCN-SDAG-NEXT: global_load_u8 v10, v[0:1], off ; GCN-SDAG-NEXT: s_wait_loadcnt 0x2 ; GCN-SDAG-NEXT: s_wait_xcnt 0x0 -; GCN-SDAG-NEXT: v_lshlrev_b16 v1, 8, v2 +; GCN-SDAG-NEXT: v_lshlrev_b16 v0, 8, v6 ; GCN-SDAG-NEXT: s_wait_loadcnt 0x1 -; GCN-SDAG-NEXT: v_lshlrev_b16 v2, 8, v3 +; GCN-SDAG-NEXT: v_lshlrev_b16 v1, 8, v7 +; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GCN-SDAG-NEXT: v_or_b32_e32 v1, v7, v1 ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0 -; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GCN-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: v_or_b32_e32 v1, v3, v2 -; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GCN-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GCN-SDAG-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_bitop2_b32 v0, v10, v0 bitop3:0x54 +; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GCN-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GCN-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; GCN-SDAG-NEXT: global_store_b32 v[8:9], v0, off ; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31] @@ -35,13 +33,15 @@ define void @test_i8load_v4i8store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL: ; %bb.0: ; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GCN-GISEL-NEXT: s_wait_kmcnt 0x0 -; GCN-GISEL-NEXT: global_load_u8 v0, v[0:1], off -; GCN-GISEL-NEXT: global_load_u8 v1, v[2:3], off -; GCN-GISEL-NEXT: global_load_u8 v2, v[4:5], off +; GCN-GISEL-NEXT: global_load_u8 v6, v[0:1], off +; GCN-GISEL-NEXT: global_load_u8 v7, v[2:3], off +; GCN-GISEL-NEXT: global_load_u8 v10, v[4:5], off ; GCN-GISEL-NEXT: s_wait_loadcnt 0x1 -; GCN-GISEL-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; GCN-GISEL-NEXT: s_wait_xcnt 0x2 +; GCN-GISEL-NEXT: v_lshl_or_b32 v0, v7, 8, v6 ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0 -; GCN-GISEL-NEXT: v_dual_lshlrev_b32 v1, 16, v2 :: v_dual_lshlrev_b32 v2, 24, v2 +; GCN-GISEL-NEXT: s_wait_xcnt 0x1 +; GCN-GISEL-NEXT: v_dual_lshlrev_b32 v1, 16, v10 :: v_dual_lshlrev_b32 v2, 24, v10 ; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GCN-GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 ; GCN-GISEL-NEXT: global_store_b32 v[8:9], v0, off @@ -64,21 +64,21 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2 ; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 ; GCN-SDAG-NEXT: s_wait_kmcnt 0x0 ; GCN-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off -; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[2:3], off -; GCN-SDAG-NEXT: v_mov_b64_e32 v[8:9], 0 -; GCN-SDAG-NEXT: s_wait_loadcnt 0x0 -; GCN-SDAG-NEXT: v_pk_add_u16 v10, v6, v2 -; GCN-SDAG-NEXT: v_pk_add_u16 v11, v7, v3 +; GCN-SDAG-NEXT: global_load_b128 v[8:11], v[2:3], off ; GCN-SDAG-NEXT: s_wait_xcnt 0x0 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], 12 +; GCN-SDAG-NEXT: s_wait_loadcnt 0x0 +; GCN-SDAG-NEXT: v_pk_add_u16 v1, v6, v10 +; GCN-SDAG-NEXT: v_pk_add_u16 v12, v7, v11 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[6:7], 8 -; GCN-SDAG-NEXT: v_pk_add_u16 v4, v4, v0 -; GCN-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v10 -; GCN-SDAG-NEXT: v_pk_add_u16 v5, v5, v1 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[10:11], 0 +; GCN-SDAG-NEXT: v_pk_add_u16 v5, v5, v9 +; GCN-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GCN-SDAG-NEXT: v_pk_add_u16 v4, v4, v8 ; GCN-SDAG-NEXT: s_clause 0x2 -; GCN-SDAG-NEXT: global_store_b16 v[2:3], v11, off -; GCN-SDAG-NEXT: global_store_b32 v[6:7], v10, off -; GCN-SDAG-NEXT: global_store_b64 v[8:9], v[4:5], off +; GCN-SDAG-NEXT: global_store_b16 v[2:3], v12, off +; GCN-SDAG-NEXT: global_store_b32 v[6:7], v1, off +; GCN-SDAG-NEXT: global_store_b64 v[10:11], v[4:5], off ; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31] ; ; GCN-GISEL-LABEL: test_v7i16_load_store: @@ -86,28 +86,29 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2 ; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GCN-GISEL-NEXT: s_wait_kmcnt 0x0 ; GCN-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[2:3], off -; GCN-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0 -; GCN-GISEL-NEXT: v_mov_b64_e32 v[10:11], 2 -; GCN-GISEL-NEXT: v_mov_b64_e32 v[12:13], 4 -; GCN-GISEL-NEXT: v_mov_b64_e32 v[14:15], 6 -; GCN-GISEL-NEXT: v_mov_b64_e32 v[16:17], 8 -; GCN-GISEL-NEXT: v_mov_b64_e32 v[18:19], 10 -; GCN-GISEL-NEXT: v_mov_b64_e32 v[20:21], 12 +; GCN-GISEL-NEXT: global_load_b128 v[8:11], v[2:3], off +; GCN-GISEL-NEXT: s_wait_xcnt 0x0 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[12:13], 2 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[14:15], 4 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[16:17], 6 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[18:19], 8 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[20:21], 10 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[22:23], 12 ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0 -; GCN-GISEL-NEXT: v_pk_add_u16 v2, v6, v2 -; GCN-GISEL-NEXT: v_pk_add_u16 v4, v4, v0 -; GCN-GISEL-NEXT: v_pk_add_u16 v1, v5, v1 -; GCN-GISEL-NEXT: v_pk_add_u16 v3, v7, v3 +; GCN-GISEL-NEXT: v_pk_add_u16 v1, v6, v10 +; GCN-GISEL-NEXT: v_pk_add_u16 v4, v4, v8 +; GCN-GISEL-NEXT: v_pk_add_u16 v5, v5, v9 +; GCN-GISEL-NEXT: v_pk_add_u16 v6, v7, v11 ; GCN-GISEL-NEXT: s_clause 0x6 -; GCN-GISEL-NEXT: global_store_b16 v[8:9], v4, off -; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[10:11], v4, off -; GCN-GISEL-NEXT: global_store_b16 v[12:13], v1, off -; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[14:15], v1, off -; GCN-GISEL-NEXT: global_store_b16 v[16:17], v2, off -; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[18:19], v2, off -; GCN-GISEL-NEXT: global_store_b16 v[20:21], v3, off -; GCN-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GCN-GISEL-NEXT: global_store_b16 v[2:3], v4, off +; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[12:13], v4, off +; GCN-GISEL-NEXT: global_store_b16 v[14:15], v5, off +; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[16:17], v5, off +; GCN-GISEL-NEXT: global_store_b16 v[18:19], v1, off +; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[20:21], v1, off +; GCN-GISEL-NEXT: global_store_b16 v[22:23], v6, off +; GCN-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v1 ; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31] %vec1 = load <7 x i16>, ptr addrspace(1) %ptr1 %insert = insertelement <7 x i16> %vec1, i16 20, i32 4 @@ -253,8 +254,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-SDAG-NEXT: global_load_b128 v[22:25], v[0:1], off offset:32 ; GCN-SDAG-NEXT: global_load_b128 v[26:29], v[0:1], off offset:16 ; GCN-SDAG-NEXT: global_load_b128 v[30:33], v[0:1], off -; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64 -; GCN-SDAG-NEXT: v_mov_b64_e32 v[36:37], 0x70 +; GCN-SDAG-NEXT: global_load_b128 v[34:37], v[0:1], off offset:64 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0x70 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[48:49], 48 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[38:39], 0x60 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[50:51], 32 @@ -262,14 +263,15 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-SDAG-NEXT: v_mov_b64_e32 v[66:67], 0 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[52:53], 0x50 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[54:55], 64 -; GCN-SDAG-NEXT: v_dual_mov_b32 v34, 0xc8 :: v_dual_mov_b32 v35, 0 +; GCN-SDAG-NEXT: s_wait_xcnt 0x0 +; GCN-SDAG-NEXT: v_dual_mov_b32 v0, 0xc8 :: v_dual_mov_b32 v1, 0 ; GCN-SDAG-NEXT: s_wait_loadcnt 0x7 -; GCN-SDAG-NEXT: global_store_b128 v[36:37], v[6:9], off +; GCN-SDAG-NEXT: global_store_b128 v[2:3], v[6:9], off ; GCN-SDAG-NEXT: s_wait_loadcnt 0x6 ; GCN-SDAG-NEXT: global_store_b128 v[38:39], v[10:13], off ; GCN-SDAG-NEXT: s_wait_loadcnt 0x5 ; GCN-SDAG-NEXT: s_wait_xcnt 0x1 -; GCN-SDAG-NEXT: v_dual_mov_b32 v36, v16 :: v_dual_mov_b32 v37, v17 +; GCN-SDAG-NEXT: v_dual_mov_b32 v2, v16 :: v_dual_mov_b32 v3, v17 ; GCN-SDAG-NEXT: s_wait_xcnt 0x0 ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[12:13], v[12:13], v[12:13] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[10:11], v[10:11], v[10:11] @@ -286,8 +288,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[8:9], v[8:9], v[8:9] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[6:7], v[6:7], v[6:7] ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0 -; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[50:51], v[2:3], v[2:3] -; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[48:49], v[0:1], v[0:1] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[50:51], v[36:37], v[36:37] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[48:49], v[34:35], v[34:35] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[16:17], v[16:17], v[16:17] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[14:15], 0xc8, v[14:15] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[24:25], 0x64, v[24:25] @@ -298,8 +300,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[20:21], v[20:21], v[20:21] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[18:19], v[18:19], v[18:19] ; GCN-SDAG-NEXT: s_clause 0x1 -; GCN-SDAG-NEXT: global_store_b128 v[52:53], v[34:37], off -; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[0:3], off +; GCN-SDAG-NEXT: global_store_b128 v[52:53], v[0:3], off +; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[34:37], off ; GCN-SDAG-NEXT: s_clause 0x7 ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:96 ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off offset:112 @@ -309,7 +311,7 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[18:21], off offset:48 ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[30:33], off ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[26:29], off offset:16 -; GCN-SDAG-NEXT: s_wait_xcnt 0x8 +; GCN-SDAG-NEXT: s_wait_xcnt 0x9 ; GCN-SDAG-NEXT: v_dual_mov_b32 v0, v32 :: v_dual_mov_b32 v1, v33 ; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31] ; @@ -325,7 +327,7 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: global_load_b128 v[22:25], v[0:1], off offset:48 ; GCN-GISEL-NEXT: global_load_b128 v[26:29], v[0:1], off offset:96 ; GCN-GISEL-NEXT: global_load_b128 v[30:33], v[0:1], off offset:112 -; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64 +; GCN-GISEL-NEXT: global_load_b128 v[34:37], v[0:1], off offset:64 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[38:39], 0 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[48:49], 16 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[50:51], 32 @@ -333,7 +335,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: v_mov_b64_e32 v[66:67], 0x60 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[68:69], 0x70 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[54:55], 64 -; GCN-GISEL-NEXT: v_mov_b64_e32 v[34:35], 0xc8 +; GCN-GISEL-NEXT: s_wait_xcnt 0x0 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[0:1], 0xc8 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[64:65], 0x50 ; GCN-GISEL-NEXT: s_wait_loadcnt 0x6 ; GCN-GISEL-NEXT: global_store_b128 v[38:39], v[10:13], off @@ -349,7 +352,7 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: global_store_b128 v[68:69], v[30:33], off ; GCN-GISEL-NEXT: s_wait_xcnt 0x5 ; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[12:13], v[12:13], v[12:13] -; GCN-GISEL-NEXT: v_mov_b64_e32 v[36:37], v[8:9] +; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], v[8:9] ; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[10:11], v[10:11], v[10:11] ; GCN-GISEL-NEXT: s_wait_xcnt 0x4 ; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[14:15], v[14:15], v[14:15] @@ -361,8 +364,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[22:23], v[22:23], v[22:23] ; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[24:25], v[24:25], v[24:25] ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0 -; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[48:49], v[0:1], v[0:1] -; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[50:51], v[2:3], v[2:3] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[48:49], v[34:35], v[34:35] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[50:51], v[36:37], v[36:37] ; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[6:7], 0xc8, v[6:7] ; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[8:9], v[8:9], v[8:9] ; GCN-GISEL-NEXT: s_wait_xcnt 0x1 @@ -372,8 +375,8 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[30:31], v[30:31], v[30:31] ; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[32:33], v[32:33], v[32:33] ; GCN-GISEL-NEXT: s_clause 0x1 -; GCN-GISEL-NEXT: global_store_b128 v[54:55], v[0:3], off -; GCN-GISEL-NEXT: global_store_b128 v[64:65], v[34:37], off +; GCN-GISEL-NEXT: global_store_b128 v[54:55], v[34:37], off +; GCN-GISEL-NEXT: global_store_b128 v[64:65], v[0:3], off ; GCN-GISEL-NEXT: s_clause 0x7 ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[10:13], off ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[14:17], off offset:16 @@ -383,7 +386,7 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[6:9], off offset:80 ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[26:29], off offset:96 ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[30:33], off offset:112 -; GCN-GISEL-NEXT: s_wait_xcnt 0x9 +; GCN-GISEL-NEXT: s_wait_xcnt 0x8 ; GCN-GISEL-NEXT: v_dual_mov_b32 v0, v12 :: v_dual_mov_b32 v1, v13 ; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31] %a = load <16 x i64>, ptr addrspace(1) %ptr_a, align 4 @@ -402,16 +405,17 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1, ; GCN-SDAG-LABEL: test_v7i16_load_store_kernel: ; GCN-SDAG: ; %bb.0: ; GCN-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 -; GCN-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GCN-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 ; GCN-SDAG-NEXT: s_wait_xcnt 0x0 ; GCN-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 -; GCN-SDAG-NEXT: v_mov_b64_e32 v[8:9], 12 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[10:11], 8 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[12:13], 0 ; GCN-SDAG-NEXT: s_wait_kmcnt 0x0 ; GCN-SDAG-NEXT: s_clause 0x1 -; GCN-SDAG-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset -; GCN-SDAG-NEXT: global_load_b128 v[4:7], v4, s[2:3] scale_offset +; GCN-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset +; GCN-SDAG-NEXT: global_load_b128 v[4:7], v8, s[2:3] scale_offset +; GCN-SDAG-NEXT: s_wait_xcnt 0x0 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[8:9], 12 ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0 ; GCN-SDAG-NEXT: v_pk_add_u16 v3, v3, v7 ; GCN-SDAG-NEXT: v_pk_add_u16 v2, v2, v6 @@ -428,10 +432,9 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1, ; GCN-GISEL-LABEL: test_v7i16_load_store_kernel: ; GCN-GISEL: ; %bb.0: ; GCN-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 -; GCN-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GCN-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0 ; GCN-GISEL-NEXT: s_wait_xcnt 0x0 ; GCN-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 -; GCN-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[10:11], 2 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[12:13], 4 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[14:15], 6 @@ -440,8 +443,10 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1, ; GCN-GISEL-NEXT: v_mov_b64_e32 v[20:21], 12 ; GCN-GISEL-NEXT: s_wait_kmcnt 0x0 ; GCN-GISEL-NEXT: s_clause 0x1 -; GCN-GISEL-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset -; GCN-GISEL-NEXT: global_load_b128 v[4:7], v4, s[2:3] scale_offset +; GCN-GISEL-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset +; GCN-GISEL-NEXT: global_load_b128 v[4:7], v8, s[2:3] scale_offset +; GCN-GISEL-NEXT: s_wait_xcnt 0x0 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0 ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0 ; GCN-GISEL-NEXT: v_pk_add_u16 v0, v0, v4 ; GCN-GISEL-NEXT: v_pk_add_u16 v1, v1, v5 |