diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/load-constant-i1.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-constant-i1.ll | 357 |
1 files changed, 179 insertions, 178 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll index bfc01ef..d59f72a 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll @@ -8343,53 +8343,53 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshr_b32 s42, s5, 30 -; GFX6-NEXT: s_lshr_b32 s36, s5, 28 -; GFX6-NEXT: s_lshr_b32 s38, s5, 29 -; GFX6-NEXT: s_lshr_b32 s30, s5, 26 -; GFX6-NEXT: s_lshr_b32 s34, s5, 27 -; GFX6-NEXT: s_lshr_b32 s26, s5, 24 -; GFX6-NEXT: s_lshr_b32 s28, s5, 25 -; GFX6-NEXT: s_lshr_b32 s22, s5, 22 -; GFX6-NEXT: s_lshr_b32 s24, s5, 23 -; GFX6-NEXT: s_lshr_b32 s18, s5, 20 -; GFX6-NEXT: s_lshr_b32 s20, s5, 21 -; GFX6-NEXT: s_lshr_b32 s14, s5, 18 -; GFX6-NEXT: s_lshr_b32 s16, s5, 19 -; GFX6-NEXT: s_lshr_b32 s10, s5, 16 -; GFX6-NEXT: s_lshr_b32 s12, s5, 17 -; GFX6-NEXT: s_lshr_b32 s6, s5, 14 -; GFX6-NEXT: s_lshr_b32 s8, s5, 15 -; GFX6-NEXT: s_mov_b32 s40, s5 +; GFX6-NEXT: s_lshr_b32 s36, s4, 30 +; GFX6-NEXT: s_lshr_b32 s38, s4, 31 +; GFX6-NEXT: s_lshr_b32 s30, s4, 28 +; GFX6-NEXT: s_lshr_b32 s34, s4, 29 +; GFX6-NEXT: s_lshr_b32 s26, s4, 26 +; GFX6-NEXT: s_lshr_b32 s28, s4, 27 +; GFX6-NEXT: s_lshr_b32 s22, s4, 24 +; GFX6-NEXT: s_lshr_b32 s24, s4, 25 +; GFX6-NEXT: s_lshr_b32 s18, s4, 22 +; GFX6-NEXT: s_lshr_b32 s20, s4, 23 +; GFX6-NEXT: s_lshr_b32 s14, s4, 20 +; GFX6-NEXT: s_lshr_b32 s16, s4, 21 +; GFX6-NEXT: s_lshr_b32 s10, s4, 18 +; GFX6-NEXT: s_lshr_b32 s12, s4, 19 +; GFX6-NEXT: s_lshr_b32 s6, s4, 16 +; GFX6-NEXT: s_lshr_b32 s8, s4, 17 ; GFX6-NEXT: s_ashr_i32 s7, s5, 31 -; GFX6-NEXT: s_bfe_i64 s[44:45], s[40:41], 0x10000 +; GFX6-NEXT: s_bfe_i64 s[44:45], s[4:5], 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v4, s7 -; GFX6-NEXT: s_lshr_b32 s40, s5, 12 +; GFX6-NEXT: s_lshr_b32 s40, s4, 14 ; GFX6-NEXT: v_mov_b32_e32 v0, s44 ; GFX6-NEXT: v_mov_b32_e32 v1, s45 -; GFX6-NEXT: s_bfe_i64 s[44:45], s[4:5], 0x10000 +; GFX6-NEXT: s_mov_b32 s44, s5 +; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v6, s44 ; GFX6-NEXT: v_mov_b32_e32 v7, s45 -; GFX6-NEXT: s_lshr_b32 s44, s5, 13 +; GFX6-NEXT: s_lshr_b32 s44, s4, 15 ; GFX6-NEXT: v_mov_b32_e32 v2, s42 ; GFX6-NEXT: v_mov_b32_e32 v3, s43 -; GFX6-NEXT: s_lshr_b32 s42, s5, 10 +; GFX6-NEXT: s_lshr_b32 s42, s4, 12 ; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v8, s36 ; GFX6-NEXT: v_mov_b32_e32 v9, s37 -; GFX6-NEXT: s_lshr_b32 s36, s5, 11 +; GFX6-NEXT: s_lshr_b32 s36, s4, 13 ; GFX6-NEXT: v_mov_b32_e32 v10, s38 ; GFX6-NEXT: v_mov_b32_e32 v11, s39 -; GFX6-NEXT: s_lshr_b32 s38, s5, 8 +; GFX6-NEXT: s_lshr_b32 s38, s4, 10 ; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v12, s30 ; GFX6-NEXT: v_mov_b32_e32 v13, s31 -; GFX6-NEXT: s_lshr_b32 s30, s5, 9 +; GFX6-NEXT: s_lshr_b32 s30, s4, 11 ; GFX6-NEXT: v_mov_b32_e32 v14, s34 ; GFX6-NEXT: v_mov_b32_e32 v15, s35 -; GFX6-NEXT: s_lshr_b32 s34, s5, 6 +; GFX6-NEXT: s_lshr_b32 s34, s4, 8 ; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v5, s7 @@ -8397,190 +8397,191 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s26 ; GFX6-NEXT: v_mov_b32_e32 v3, s27 -; GFX6-NEXT: s_lshr_b32 s26, s5, 7 +; GFX6-NEXT: s_lshr_b32 s26, s4, 9 ; GFX6-NEXT: v_mov_b32_e32 v4, s28 ; GFX6-NEXT: v_mov_b32_e32 v5, s29 -; GFX6-NEXT: s_lshr_b32 s28, s5, 4 +; GFX6-NEXT: s_lshr_b32 s28, s4, 6 ; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:480 +; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:240 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v8, s22 ; GFX6-NEXT: v_mov_b32_e32 v9, s23 -; GFX6-NEXT: s_lshr_b32 s22, s5, 5 +; GFX6-NEXT: s_lshr_b32 s22, s4, 7 ; GFX6-NEXT: v_mov_b32_e32 v10, s24 ; GFX6-NEXT: v_mov_b32_e32 v11, s25 -; GFX6-NEXT: s_lshr_b32 s24, s5, 2 +; GFX6-NEXT: s_lshr_b32 s24, s4, 4 ; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:464 +; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:224 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v12, s18 ; GFX6-NEXT: v_mov_b32_e32 v13, s19 -; GFX6-NEXT: s_lshr_b32 s18, s5, 3 +; GFX6-NEXT: s_lshr_b32 s18, s4, 5 ; GFX6-NEXT: v_mov_b32_e32 v14, s20 ; GFX6-NEXT: v_mov_b32_e32 v15, s21 -; GFX6-NEXT: s_lshr_b32 s20, s5, 1 +; GFX6-NEXT: s_lshr_b32 s20, s4, 2 ; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:448 +; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:208 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s14 ; GFX6-NEXT: v_mov_b32_e32 v3, s15 -; GFX6-NEXT: s_lshr_b32 s14, s4, 30 +; GFX6-NEXT: s_lshr_b32 s14, s4, 3 ; GFX6-NEXT: v_mov_b32_e32 v4, s16 ; GFX6-NEXT: v_mov_b32_e32 v5, s17 -; GFX6-NEXT: s_lshr_b32 s16, s4, 31 +; GFX6-NEXT: s_lshr_b32 s16, s4, 1 ; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:432 +; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:192 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v8, s10 ; GFX6-NEXT: v_mov_b32_e32 v9, s11 -; GFX6-NEXT: s_lshr_b32 s10, s4, 28 +; GFX6-NEXT: s_lshr_b32 s10, s5, 29 ; GFX6-NEXT: v_mov_b32_e32 v10, s12 ; GFX6-NEXT: v_mov_b32_e32 v11, s13 -; GFX6-NEXT: s_lshr_b32 s12, s4, 29 +; GFX6-NEXT: s_lshr_b32 s12, s5, 28 ; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:416 +; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:176 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v12, s6 ; GFX6-NEXT: v_mov_b32_e32 v13, s7 -; GFX6-NEXT: s_lshr_b32 s46, s4, 26 +; GFX6-NEXT: s_lshr_b32 s6, s5, 26 ; GFX6-NEXT: v_mov_b32_e32 v14, s8 ; GFX6-NEXT: v_mov_b32_e32 v15, s9 -; GFX6-NEXT: s_lshr_b32 s8, s4, 27 -; GFX6-NEXT: s_bfe_i64 s[6:7], s[44:45], 0x10000 +; GFX6-NEXT: s_lshr_b32 s8, s5, 27 +; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:400 +; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:160 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s40 ; GFX6-NEXT: v_mov_b32_e32 v3, s41 -; GFX6-NEXT: s_lshr_b32 s40, s4, 24 -; GFX6-NEXT: v_mov_b32_e32 v4, s6 -; GFX6-NEXT: v_mov_b32_e32 v5, s7 -; GFX6-NEXT: s_lshr_b32 s44, s4, 25 -; GFX6-NEXT: s_bfe_i64 s[6:7], s[36:37], 0x10000 -; GFX6-NEXT: s_bfe_i64 s[36:37], s[42:43], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:384 +; GFX6-NEXT: s_lshr_b32 s40, s5, 25 +; GFX6-NEXT: v_mov_b32_e32 v4, s44 +; GFX6-NEXT: v_mov_b32_e32 v5, s45 +; GFX6-NEXT: s_lshr_b32 s44, s5, 24 +; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 +; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 +; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:144 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v8, s36 -; GFX6-NEXT: v_mov_b32_e32 v9, s37 -; GFX6-NEXT: s_lshr_b32 s36, s4, 22 -; GFX6-NEXT: v_mov_b32_e32 v10, s6 -; GFX6-NEXT: v_mov_b32_e32 v11, s7 -; GFX6-NEXT: s_lshr_b32 s42, s4, 23 -; GFX6-NEXT: s_bfe_i64 s[6:7], s[30:31], 0x10000 -; GFX6-NEXT: s_bfe_i64 s[30:31], s[38:39], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:368 +; GFX6-NEXT: v_mov_b32_e32 v8, s42 +; GFX6-NEXT: v_mov_b32_e32 v9, s43 +; GFX6-NEXT: s_lshr_b32 s42, s5, 22 +; GFX6-NEXT: v_mov_b32_e32 v10, s36 +; GFX6-NEXT: v_mov_b32_e32 v11, s37 +; GFX6-NEXT: s_lshr_b32 s36, s5, 23 +; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 +; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 +; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v12, s30 -; GFX6-NEXT: v_mov_b32_e32 v13, s31 -; GFX6-NEXT: s_lshr_b32 s30, s4, 20 -; GFX6-NEXT: v_mov_b32_e32 v14, s6 -; GFX6-NEXT: v_mov_b32_e32 v15, s7 -; GFX6-NEXT: s_lshr_b32 s6, s4, 21 +; GFX6-NEXT: v_mov_b32_e32 v12, s38 +; GFX6-NEXT: v_mov_b32_e32 v13, s39 +; GFX6-NEXT: s_lshr_b32 s38, s5, 20 +; GFX6-NEXT: v_mov_b32_e32 v14, s30 +; GFX6-NEXT: v_mov_b32_e32 v15, s31 +; GFX6-NEXT: s_lshr_b32 s4, s5, 21 ; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 -; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:352 -; GFX6-NEXT: v_mov_b32_e32 v16, s34 -; GFX6-NEXT: v_mov_b32_e32 v17, s35 -; GFX6-NEXT: s_lshr_b32 s34, s4, 18 -; GFX6-NEXT: v_mov_b32_e32 v18, s26 -; GFX6-NEXT: v_mov_b32_e32 v19, s27 -; GFX6-NEXT: s_lshr_b32 s26, s4, 19 +; GFX6-NEXT: s_bfe_i64 s[30:31], s[34:35], 0x10000 +; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:112 +; GFX6-NEXT: s_waitcnt expcnt(0) +; GFX6-NEXT: v_mov_b32_e32 v2, s30 +; GFX6-NEXT: v_mov_b32_e32 v3, s31 +; GFX6-NEXT: s_lshr_b32 s30, s5, 18 +; GFX6-NEXT: v_mov_b32_e32 v4, s26 +; GFX6-NEXT: v_mov_b32_e32 v5, s27 +; GFX6-NEXT: s_lshr_b32 s26, s5, 19 ; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:336 +; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:96 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v8, s28 ; GFX6-NEXT: v_mov_b32_e32 v9, s29 -; GFX6-NEXT: s_lshr_b32 s28, s4, 16 +; GFX6-NEXT: s_lshr_b32 s28, s5, 17 ; GFX6-NEXT: v_mov_b32_e32 v10, s22 ; GFX6-NEXT: v_mov_b32_e32 v11, s23 -; GFX6-NEXT: s_lshr_b32 s22, s4, 17 +; GFX6-NEXT: s_lshr_b32 s22, s5, 16 +; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:320 +; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:80 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v12, s24 ; GFX6-NEXT: v_mov_b32_e32 v13, s25 -; GFX6-NEXT: s_lshr_b32 s24, s4, 14 -; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 -; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 +; GFX6-NEXT: s_lshr_b32 s24, s5, 14 ; GFX6-NEXT: v_mov_b32_e32 v14, s18 ; GFX6-NEXT: v_mov_b32_e32 v15, s19 -; GFX6-NEXT: s_lshr_b32 s18, s4, 15 -; GFX6-NEXT: v_mov_b32_e32 v2, s20 -; GFX6-NEXT: v_mov_b32_e32 v3, s21 -; GFX6-NEXT: s_lshr_b32 s20, s4, 12 -; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 +; GFX6-NEXT: s_lshr_b32 s18, s5, 15 +; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 +; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:64 +; GFX6-NEXT: v_mov_b32_e32 v16, s20 +; GFX6-NEXT: v_mov_b32_e32 v17, s21 +; GFX6-NEXT: s_lshr_b32 s20, s5, 12 ; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:304 +; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 +; GFX6-NEXT: v_mov_b32_e32 v18, s14 +; GFX6-NEXT: v_mov_b32_e32 v19, s15 +; GFX6-NEXT: s_lshr_b32 s14, s5, 13 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v16, s14 -; GFX6-NEXT: v_mov_b32_e32 v17, s15 -; GFX6-NEXT: s_lshr_b32 s14, s4, 13 -; GFX6-NEXT: v_mov_b32_e32 v18, s16 -; GFX6-NEXT: v_mov_b32_e32 v19, s17 -; GFX6-NEXT: s_lshr_b32 s16, s4, 10 +; GFX6-NEXT: v_mov_b32_e32 v2, s16 +; GFX6-NEXT: v_mov_b32_e32 v3, s17 +; GFX6-NEXT: s_lshr_b32 s16, s5, 10 ; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:288 +; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v8, s10 -; GFX6-NEXT: v_mov_b32_e32 v9, s11 -; GFX6-NEXT: s_lshr_b32 s10, s4, 11 -; GFX6-NEXT: v_mov_b32_e32 v10, s12 -; GFX6-NEXT: v_mov_b32_e32 v11, s13 -; GFX6-NEXT: s_lshr_b32 s12, s4, 8 +; GFX6-NEXT: v_mov_b32_e32 v8, s12 +; GFX6-NEXT: v_mov_b32_e32 v9, s13 +; GFX6-NEXT: s_lshr_b32 s12, s5, 11 +; GFX6-NEXT: v_mov_b32_e32 v10, s10 +; GFX6-NEXT: v_mov_b32_e32 v11, s11 +; GFX6-NEXT: s_lshr_b32 s10, s5, 8 ; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 -; GFX6-NEXT: s_bfe_i64 s[38:39], s[46:47], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:272 +; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 +; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v12, s38 -; GFX6-NEXT: v_mov_b32_e32 v13, s39 -; GFX6-NEXT: s_lshr_b32 s38, s4, 9 +; GFX6-NEXT: v_mov_b32_e32 v12, s6 +; GFX6-NEXT: v_mov_b32_e32 v13, s7 +; GFX6-NEXT: s_lshr_b32 s6, s5, 9 ; GFX6-NEXT: v_mov_b32_e32 v14, s8 ; GFX6-NEXT: v_mov_b32_e32 v15, s9 -; GFX6-NEXT: s_lshr_b32 s8, s4, 6 -; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 +; GFX6-NEXT: s_lshr_b32 s8, s5, 6 +; GFX6-NEXT: s_bfe_i64 s[34:35], s[44:45], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:256 +; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s40 -; GFX6-NEXT: v_mov_b32_e32 v1, s41 -; GFX6-NEXT: s_lshr_b32 s40, s4, 7 -; GFX6-NEXT: v_mov_b32_e32 v2, s44 -; GFX6-NEXT: v_mov_b32_e32 v3, s45 -; GFX6-NEXT: s_lshr_b32 s44, s4, 4 -; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 +; GFX6-NEXT: v_mov_b32_e32 v16, s34 +; GFX6-NEXT: v_mov_b32_e32 v17, s35 +; GFX6-NEXT: s_lshr_b32 s34, s5, 7 +; GFX6-NEXT: v_mov_b32_e32 v18, s40 +; GFX6-NEXT: v_mov_b32_e32 v19, s41 +; GFX6-NEXT: s_lshr_b32 s40, s5, 4 ; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:240 +; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v16, s36 -; GFX6-NEXT: v_mov_b32_e32 v17, s37 -; GFX6-NEXT: s_lshr_b32 s36, s4, 5 -; GFX6-NEXT: v_mov_b32_e32 v18, s42 -; GFX6-NEXT: v_mov_b32_e32 v19, s43 -; GFX6-NEXT: s_lshr_b32 s42, s4, 2 -; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:224 +; GFX6-NEXT: v_mov_b32_e32 v0, s42 +; GFX6-NEXT: v_mov_b32_e32 v1, s43 +; GFX6-NEXT: s_lshr_b32 s42, s5, 5 +; GFX6-NEXT: v_mov_b32_e32 v2, s36 +; GFX6-NEXT: v_mov_b32_e32 v3, s37 +; GFX6-NEXT: s_lshr_b32 s36, s5, 2 +; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 +; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:480 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v8, s30 -; GFX6-NEXT: v_mov_b32_e32 v9, s31 -; GFX6-NEXT: s_lshr_b32 s30, s4, 3 -; GFX6-NEXT: s_lshr_b32 s4, s4, 1 -; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 -; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 -; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 -; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 +; GFX6-NEXT: v_mov_b32_e32 v8, s38 +; GFX6-NEXT: v_mov_b32_e32 v9, s39 +; GFX6-NEXT: s_lshr_b32 s38, s5, 3 +; GFX6-NEXT: s_lshr_b32 s44, s5, 1 ; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 +; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 +; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 +; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 +; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 -; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 -; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 +; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 +; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 @@ -8589,71 +8590,71 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 ; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 -; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 -; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 -; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:208 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 -; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 -; GFX6-NEXT: v_mov_b32_e32 v10, s6 -; GFX6-NEXT: v_mov_b32_e32 v11, s7 -; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160 -; GFX6-NEXT: s_waitcnt expcnt(2) -; GFX6-NEXT: v_mov_b32_e32 v0, s34 -; GFX6-NEXT: v_mov_b32_e32 v1, s35 +; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 +; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 +; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:464 +; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:448 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:432 +; GFX6-NEXT: v_mov_b32_e32 v10, s4 +; GFX6-NEXT: v_mov_b32_e32 v11, s5 +; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:416 +; GFX6-NEXT: s_waitcnt expcnt(1) +; GFX6-NEXT: v_mov_b32_e32 v0, s30 +; GFX6-NEXT: v_mov_b32_e32 v1, s31 ; GFX6-NEXT: v_mov_b32_e32 v2, s26 ; GFX6-NEXT: v_mov_b32_e32 v3, s27 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:400 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s28 -; GFX6-NEXT: v_mov_b32_e32 v1, s29 -; GFX6-NEXT: v_mov_b32_e32 v2, s22 -; GFX6-NEXT: v_mov_b32_e32 v3, s23 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 +; GFX6-NEXT: v_mov_b32_e32 v0, s22 +; GFX6-NEXT: v_mov_b32_e32 v1, s23 +; GFX6-NEXT: v_mov_b32_e32 v2, s28 +; GFX6-NEXT: v_mov_b32_e32 v3, s29 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:384 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s24 ; GFX6-NEXT: v_mov_b32_e32 v1, s25 ; GFX6-NEXT: v_mov_b32_e32 v2, s18 ; GFX6-NEXT: v_mov_b32_e32 v3, s19 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:368 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s20 ; GFX6-NEXT: v_mov_b32_e32 v1, s21 ; GFX6-NEXT: v_mov_b32_e32 v2, s14 ; GFX6-NEXT: v_mov_b32_e32 v3, s15 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:352 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s16 ; GFX6-NEXT: v_mov_b32_e32 v1, s17 -; GFX6-NEXT: v_mov_b32_e32 v2, s10 -; GFX6-NEXT: v_mov_b32_e32 v3, s11 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 +; GFX6-NEXT: v_mov_b32_e32 v2, s12 +; GFX6-NEXT: v_mov_b32_e32 v3, s13 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:336 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s12 -; GFX6-NEXT: v_mov_b32_e32 v1, s13 -; GFX6-NEXT: v_mov_b32_e32 v2, s38 -; GFX6-NEXT: v_mov_b32_e32 v3, s39 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 +; GFX6-NEXT: v_mov_b32_e32 v0, s10 +; GFX6-NEXT: v_mov_b32_e32 v1, s11 +; GFX6-NEXT: v_mov_b32_e32 v2, s6 +; GFX6-NEXT: v_mov_b32_e32 v3, s7 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:320 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s8 ; GFX6-NEXT: v_mov_b32_e32 v1, s9 -; GFX6-NEXT: v_mov_b32_e32 v2, s40 -; GFX6-NEXT: v_mov_b32_e32 v3, s41 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 +; GFX6-NEXT: v_mov_b32_e32 v2, s34 +; GFX6-NEXT: v_mov_b32_e32 v3, s35 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:304 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s44 -; GFX6-NEXT: v_mov_b32_e32 v1, s45 -; GFX6-NEXT: v_mov_b32_e32 v2, s36 -; GFX6-NEXT: v_mov_b32_e32 v3, s37 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 +; GFX6-NEXT: v_mov_b32_e32 v0, s40 +; GFX6-NEXT: v_mov_b32_e32 v1, s41 +; GFX6-NEXT: v_mov_b32_e32 v2, s42 +; GFX6-NEXT: v_mov_b32_e32 v3, s43 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:288 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s42 -; GFX6-NEXT: v_mov_b32_e32 v1, s43 -; GFX6-NEXT: v_mov_b32_e32 v2, s30 -; GFX6-NEXT: v_mov_b32_e32 v3, s31 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 -; GFX6-NEXT: v_mov_b32_e32 v8, s4 -; GFX6-NEXT: v_mov_b32_e32 v9, s5 -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s36 +; GFX6-NEXT: v_mov_b32_e32 v1, s37 +; GFX6-NEXT: v_mov_b32_e32 v2, s38 +; GFX6-NEXT: v_mov_b32_e32 v3, s39 +; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:272 +; GFX6-NEXT: v_mov_b32_e32 v8, s44 +; GFX6-NEXT: v_mov_b32_e32 v9, s45 +; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:256 ; GFX6-NEXT: s_endpgm ; ; GFX8-LABEL: constant_sextload_v64i1_to_v64i64: |