diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll | 131 |
1 files changed, 65 insertions, 66 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll index 5d35adc..fd644a3 100644 --- a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll @@ -304,78 +304,79 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:4 ; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32 ; GCN-SDAG-NEXT: s_clause 0x7 -; GCN-SDAG-NEXT: global_load_b128 v[6:9], v[0:1], off offset:112 -; GCN-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off offset:96 -; GCN-SDAG-NEXT: global_load_b128 v[18:21], v[0:1], off offset:80 +; GCN-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off offset:112 +; GCN-SDAG-NEXT: global_load_b128 v[18:21], v[0:1], off offset:96 +; GCN-SDAG-NEXT: global_load_b128 v[6:9], v[0:1], off offset:80 ; GCN-SDAG-NEXT: global_load_b128 v[34:37], v[0:1], off offset:48 -; GCN-SDAG-NEXT: global_load_b128 v[30:33], v[0:1], off offset:32 -; GCN-SDAG-NEXT: global_load_b128 v[22:25], v[0:1], off offset:16 -; GCN-SDAG-NEXT: global_load_b128 v[26:29], v[0:1], off +; GCN-SDAG-NEXT: global_load_b128 v[14:17], v[0:1], off offset:32 +; GCN-SDAG-NEXT: global_load_b128 v[26:29], v[0:1], off offset:16 +; GCN-SDAG-NEXT: global_load_b128 v[30:33], v[0:1], off ; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64 -; GCN-SDAG-NEXT: v_mov_b64_e32 v[16:17], 0x70 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[24:25], 0x70 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[50:51], 0x60 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[52:53], 48 -; GCN-SDAG-NEXT: v_mov_b64_e32 v[38:39], 0x50 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[54:55], 32 -; GCN-SDAG-NEXT: v_mov_b64_e32 v[48:49], 64 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[40:41], 16 -; GCN-SDAG-NEXT: v_dual_mov_b32 v14, 0xc8 :: v_dual_mov_b32 v15, 0 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[38:39], 0x50 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[42:43], 0 +; GCN-SDAG-NEXT: v_mov_b64_e32 v[48:49], 64 +; GCN-SDAG-NEXT: v_dual_mov_b32 v22, 0xc8 :: v_dual_mov_b32 v23, 0 ; GCN-SDAG-NEXT: s_wait_loadcnt 0x7 -; GCN-SDAG-NEXT: global_store_b128 v[16:17], v[6:9], off +; GCN-SDAG-NEXT: global_store_b128 v[24:25], v[10:13], off ; GCN-SDAG-NEXT: s_wait_loadcnt 0x6 -; GCN-SDAG-NEXT: global_store_b128 v[50:51], v[10:13], off +; GCN-SDAG-NEXT: global_store_b128 v[50:51], v[18:21], off ; GCN-SDAG-NEXT: s_wait_loadcnt 0x5 ; GCN-SDAG-NEXT: s_wait_xcnt 0x1 -; GCN-SDAG-NEXT: v_dual_mov_b32 v16, v20 :: v_dual_mov_b32 v17, v21 +; GCN-SDAG-NEXT: v_dual_mov_b32 v24, v8 :: v_dual_mov_b32 v25, v9 ; GCN-SDAG-NEXT: s_wait_xcnt 0x0 -; GCN-SDAG-NEXT: v_lshl_add_u64 v[12:13], v[12:13], 0, v[12:13] -; GCN-SDAG-NEXT: v_lshl_add_u64 v[10:11], v[10:11], 0, v[10:11] -; GCN-SDAG-NEXT: v_lshl_add_u64 v[8:9], v[8:9], 0, v[8:9] -; GCN-SDAG-NEXT: v_lshl_add_u64 v[6:7], v[6:7], 0, v[6:7] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[20:21], v[20:21], v[20:21] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[18:19], v[18:19], v[18:19] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[12:13], v[12:13], v[12:13] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[10:11], v[10:11], v[10:11] ; GCN-SDAG-NEXT: s_wait_loadcnt 0x4 ; GCN-SDAG-NEXT: global_store_b128 v[52:53], v[34:37], off ; GCN-SDAG-NEXT: s_wait_loadcnt 0x3 -; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[30:33], off +; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[14:17], off ; GCN-SDAG-NEXT: s_wait_loadcnt 0x2 -; GCN-SDAG-NEXT: global_store_b128 v[40:41], v[22:25], off +; GCN-SDAG-NEXT: global_store_b128 v[40:41], v[26:29], off ; GCN-SDAG-NEXT: s_wait_loadcnt 0x1 -; GCN-SDAG-NEXT: global_store_b128 v[42:43], v[26:29], off +; GCN-SDAG-NEXT: global_store_b128 v[42:43], v[30:33], off ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0 ; GCN-SDAG-NEXT: s_wait_xcnt 0x3 -; GCN-SDAG-NEXT: v_lshl_add_u64 v[52:53], v[2:3], 0, v[2:3] -; GCN-SDAG-NEXT: v_lshl_add_u64 v[50:51], v[0:1], 0, v[0:1] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[52:53], v[2:3], v[2:3] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[50:51], v[0:1], v[0:1] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[8:9], v[8:9], v[8:9] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[6:7], 0xc8, v[6:7] +; GCN-SDAG-NEXT: s_wait_xcnt 0x2 +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[16:17], 0x64, v[16:17] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[14:15], v[14:15], v[14:15] ; GCN-SDAG-NEXT: s_wait_xcnt 0x1 -; GCN-SDAG-NEXT: v_lshl_add_u64 v[24:25], v[24:25], 0, v[24:25] -; GCN-SDAG-NEXT: v_lshl_add_u64 v[22:23], v[22:23], 0, v[22:23] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[28:29], v[28:29], v[28:29] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[26:27], v[26:27], v[26:27] ; GCN-SDAG-NEXT: s_wait_xcnt 0x0 -; GCN-SDAG-NEXT: v_lshl_add_u64 v[28:29], v[28:29], 0, v[28:29] -; GCN-SDAG-NEXT: v_lshl_add_u64 v[26:27], v[26:27], 0, v[26:27] -; GCN-SDAG-NEXT: v_lshl_add_u64 v[36:37], v[36:37], 0, v[36:37] -; GCN-SDAG-NEXT: v_lshl_add_u64 v[34:35], v[34:35], 0, v[34:35] -; GCN-SDAG-NEXT: v_lshl_add_u64 v[32:33], v[32:33], 0, 0x64 -; GCN-SDAG-NEXT: v_lshl_add_u64 v[30:31], v[30:31], 0, v[30:31] -; GCN-SDAG-NEXT: v_lshl_add_u64 v[20:21], v[20:21], 0, v[20:21] -; GCN-SDAG-NEXT: v_lshl_add_u64 v[18:19], v[18:19], 0, 0xc8 +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[32:33], v[32:33], v[32:33] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[30:31], v[30:31], v[30:31] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[36:37], v[36:37], v[36:37] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[34:35], v[34:35], v[34:35] ; GCN-SDAG-NEXT: s_clause 0x1 -; GCN-SDAG-NEXT: global_store_b128 v[38:39], v[14:17], off +; GCN-SDAG-NEXT: global_store_b128 v[38:39], v[22:25], off ; GCN-SDAG-NEXT: global_store_b128 v[48:49], v[0:3], off ; GCN-SDAG-NEXT: s_clause 0x7 -; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:96 -; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off offset:112 +; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[18:21], off offset:96 +; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:112 ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[50:53], off offset:64 -; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[18:21], off offset:80 -; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[30:33], off offset:32 +; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off offset:80 +; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[14:17], off offset:32 ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[34:37], off offset:48 -; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[26:29], off -; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[22:25], off offset:16 +; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[30:33], off +; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[26:29], off offset:16 ; GCN-SDAG-NEXT: s_clause 0x3 ; GCN-SDAG-NEXT: scratch_load_b32 v43, off, s32 ; GCN-SDAG-NEXT: scratch_load_b32 v42, off, s32 offset:4 ; GCN-SDAG-NEXT: scratch_load_b32 v41, off, s32 offset:8 ; GCN-SDAG-NEXT: scratch_load_b32 v40, off, s32 offset:12 ; GCN-SDAG-NEXT: s_wait_xcnt 0xc -; GCN-SDAG-NEXT: v_dual_mov_b32 v0, v28 :: v_dual_mov_b32 v1, v29 +; GCN-SDAG-NEXT: v_dual_mov_b32 v0, v32 :: v_dual_mov_b32 v1, v33 ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0 ; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31] ; @@ -403,11 +404,11 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: v_mov_b64_e32 v[48:49], 16 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[50:51], 32 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[52:53], 48 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[42:43], 0x60 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[54:55], 64 +; GCN-GISEL-NEXT: v_mov_b64_e32 v[44:45], 0x70 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[34:35], 0xc8 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[40:41], 0x50 -; GCN-GISEL-NEXT: v_mov_b64_e32 v[42:43], 0x60 -; GCN-GISEL-NEXT: v_mov_b64_e32 v[44:45], 0x70 ; GCN-GISEL-NEXT: s_wait_loadcnt 0x6 ; GCN-GISEL-NEXT: global_store_b128 v[38:39], v[10:13], off ; GCN-GISEL-NEXT: s_wait_loadcnt 0x5 @@ -422,28 +423,28 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: global_store_b128 v[44:45], v[30:33], off ; GCN-GISEL-NEXT: v_mov_b64_e32 v[36:37], v[8:9] ; GCN-GISEL-NEXT: s_wait_xcnt 0x5 -; GCN-GISEL-NEXT: v_lshl_add_u64 v[10:11], v[10:11], 0, v[10:11] -; GCN-GISEL-NEXT: v_lshl_add_u64 v[12:13], v[12:13], 0, v[12:13] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[10:11], v[10:11], v[10:11] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[12:13], v[12:13], v[12:13] ; GCN-GISEL-NEXT: s_wait_xcnt 0x4 -; GCN-GISEL-NEXT: v_lshl_add_u64 v[14:15], v[14:15], 0, v[14:15] -; GCN-GISEL-NEXT: v_lshl_add_u64 v[16:17], v[16:17], 0, v[16:17] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[14:15], v[14:15], v[14:15] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[16:17], v[16:17], v[16:17] ; GCN-GISEL-NEXT: s_wait_xcnt 0x3 -; GCN-GISEL-NEXT: v_lshl_add_u64 v[18:19], v[18:19], 0, v[18:19] -; GCN-GISEL-NEXT: v_lshl_add_u64 v[20:21], v[20:21], 0, 0x64 +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[18:19], v[18:19], v[18:19] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[20:21], 0x64, v[20:21] ; GCN-GISEL-NEXT: s_wait_xcnt 0x2 -; GCN-GISEL-NEXT: v_lshl_add_u64 v[22:23], v[22:23], 0, v[22:23] -; GCN-GISEL-NEXT: v_lshl_add_u64 v[24:25], v[24:25], 0, v[24:25] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[22:23], v[22:23], v[22:23] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[24:25], v[24:25], v[24:25] ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0 -; GCN-GISEL-NEXT: v_lshl_add_u64 v[48:49], v[0:1], 0, v[0:1] -; GCN-GISEL-NEXT: v_lshl_add_u64 v[50:51], v[2:3], 0, v[2:3] -; GCN-GISEL-NEXT: v_lshl_add_u64 v[6:7], v[6:7], 0, 0xc8 -; GCN-GISEL-NEXT: v_lshl_add_u64 v[8:9], v[8:9], 0, v[8:9] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[48:49], v[0:1], v[0:1] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[50:51], v[2:3], v[2:3] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[6:7], 0xc8, v[6:7] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[8:9], v[8:9], v[8:9] ; GCN-GISEL-NEXT: s_wait_xcnt 0x1 -; GCN-GISEL-NEXT: v_lshl_add_u64 v[26:27], v[26:27], 0, v[26:27] -; GCN-GISEL-NEXT: v_lshl_add_u64 v[28:29], v[28:29], 0, v[28:29] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[26:27], v[26:27], v[26:27] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[28:29], v[28:29], v[28:29] ; GCN-GISEL-NEXT: s_wait_xcnt 0x0 -; GCN-GISEL-NEXT: v_lshl_add_u64 v[30:31], v[30:31], 0, v[30:31] -; GCN-GISEL-NEXT: v_lshl_add_u64 v[32:33], v[32:33], 0, v[32:33] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[30:31], v[30:31], v[30:31] +; GCN-GISEL-NEXT: v_add_nc_u64_e32 v[32:33], v[32:33], v[32:33] ; GCN-GISEL-NEXT: s_clause 0x1 ; GCN-GISEL-NEXT: global_store_b128 v[54:55], v[0:3], off ; GCN-GISEL-NEXT: global_store_b128 v[40:41], v[34:37], off @@ -482,17 +483,16 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1, ; GCN-SDAG-LABEL: test_v7i16_load_store_kernel: ; GCN-SDAG: ; %bb.0: ; GCN-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 -; GCN-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GCN-SDAG-NEXT: v_and_b32_e32 v4, 0x3ff, v0 ; GCN-SDAG-NEXT: s_wait_xcnt 0x0 ; GCN-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[8:9], 12 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[10:11], 8 ; GCN-SDAG-NEXT: v_mov_b64_e32 v[12:13], 0 -; GCN-SDAG-NEXT: v_lshlrev_b32_e32 v4, 4, v0 ; GCN-SDAG-NEXT: s_wait_kmcnt 0x0 ; GCN-SDAG-NEXT: s_clause 0x1 -; GCN-SDAG-NEXT: global_load_b128 v[0:3], v4, s[0:1] -; GCN-SDAG-NEXT: global_load_b128 v[4:7], v4, s[2:3] +; GCN-SDAG-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset +; GCN-SDAG-NEXT: global_load_b128 v[4:7], v4, s[2:3] scale_offset ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0 ; GCN-SDAG-NEXT: v_pk_add_u16 v3, v3, v7 ; GCN-SDAG-NEXT: v_pk_add_u16 v2, v2, v6 @@ -509,21 +509,20 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1, ; GCN-GISEL-LABEL: test_v7i16_load_store_kernel: ; GCN-GISEL: ; %bb.0: ; GCN-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 -; GCN-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GCN-GISEL-NEXT: v_and_b32_e32 v4, 0x3ff, v0 ; GCN-GISEL-NEXT: s_wait_xcnt 0x0 ; GCN-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[8:9], 0 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[10:11], 2 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[12:13], 4 -; GCN-GISEL-NEXT: v_lshlrev_b32_e32 v4, 4, v0 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[14:15], 6 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[16:17], 8 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[18:19], 10 ; GCN-GISEL-NEXT: v_mov_b64_e32 v[20:21], 12 ; GCN-GISEL-NEXT: s_wait_kmcnt 0x0 ; GCN-GISEL-NEXT: s_clause 0x1 -; GCN-GISEL-NEXT: global_load_b128 v[0:3], v4, s[0:1] -; GCN-GISEL-NEXT: global_load_b128 v[4:7], v4, s[2:3] +; GCN-GISEL-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset +; GCN-GISEL-NEXT: global_load_b128 v[4:7], v4, s[2:3] scale_offset ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0 ; GCN-GISEL-NEXT: v_pk_add_u16 v0, v0, v4 ; GCN-GISEL-NEXT: v_pk_add_u16 v1, v1, v5 |