diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll | 49 |
1 files changed, 10 insertions, 39 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll index 04f8ad8..6831380 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll @@ -450,38 +450,23 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7) ; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v1 ; GISEL-GFX942-NEXT: v_add_u32_e32 v1, 0x100, v1 ; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) +; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) +; GISEL-GFX942-NEXT: scratch_store_dwordx4 off, a[0:3], off ; 16-byte Folded Spill ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v63, s[4:7], 0 offen offset:16 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[10:13], v63, s[4:7], 0 offen offset:32 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v63, s[4:7], 0 offen offset:48 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v63, s[4:7], 0 offen offset:64 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v63, s[4:7], 0 offen offset:80 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v63, s[4:7], 0 offen offset:96 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v63, s[4:7], 0 offen offset:112 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v63, s[4:7], 0 offen offset:128 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v63, s[4:7], 0 offen offset:144 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v63, s[4:7], 0 offen offset:160 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v63, s[4:7], 0 offen offset:176 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v63, s[4:7], 0 offen offset:192 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v63, s[4:7], 0 offen offset:208 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v63, s[4:7], 0 offen offset:224 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) -; GISEL-GFX942-NEXT: scratch_store_dwordx4 off, a[0:3], off ; 16-byte Folded Spill ; GISEL-GFX942-NEXT: scratch_load_dwordx4 v[2:5], off, off ; 16-byte Folded Reload ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen offset:240 @@ -991,38 +976,23 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp ; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v1 ; GISEL-GFX942-NEXT: v_add_u32_e32 v1, 0x100, v1 ; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) +; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) +; GISEL-GFX942-NEXT: scratch_store_dwordx4 off, a[0:3], off ; 16-byte Folded Spill ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v63, s[4:7], 0 offen offset:16 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[10:13], v63, s[4:7], 0 offen offset:32 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v63, s[4:7], 0 offen offset:48 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v63, s[4:7], 0 offen offset:64 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v63, s[4:7], 0 offen offset:80 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v63, s[4:7], 0 offen offset:96 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v63, s[4:7], 0 offen offset:112 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v63, s[4:7], 0 offen offset:128 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v63, s[4:7], 0 offen offset:144 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v63, s[4:7], 0 offen offset:160 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v63, s[4:7], 0 offen offset:176 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v63, s[4:7], 0 offen offset:192 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v63, s[4:7], 0 offen offset:208 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v63, s[4:7], 0 offen offset:224 -; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15) -; GISEL-GFX942-NEXT: scratch_store_dwordx4 off, a[0:3], off ; 16-byte Folded Spill ; GISEL-GFX942-NEXT: scratch_load_dwordx4 v[2:5], off, off ; 16-byte Folded Reload ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen offset:240 @@ -1189,23 +1159,24 @@ define amdgpu_kernel void @memcpy_known_small(ptr addrspace(7) %src, ptr addrspa ; SDAG-GFX1100-NEXT: s_mov_b32 s9, s12 ; SDAG-GFX1100-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-GFX1100-NEXT: s_mov_b32 s6, s3 +; SDAG-GFX1100-NEXT: v_mov_b32_e32 v4, s0 ; SDAG-GFX1100-NEXT: s_mov_b32 s8, s1 ; SDAG-GFX1100-NEXT: s_or_b64 s[10:11], s[6:7], s[12:13] ; SDAG-GFX1100-NEXT: s_mov_b32 s13, s2 -; SDAG-GFX1100-NEXT: v_mov_b32_e32 v4, s0 +; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; SDAG-GFX1100-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] +; SDAG-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen ; SDAG-GFX1100-NEXT: s_clause 0x1 ; SDAG-GFX1100-NEXT: s_load_b32 s13, s[4:5], 0x54 ; SDAG-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x44 ; SDAG-GFX1100-NEXT: s_mov_b32 s5, s12 ; SDAG-GFX1100-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-GFX1100-NEXT: v_mov_b32_e32 v5, s0 -; SDAG-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen ; SDAG-GFX1100-NEXT: s_mov_b32 s4, s3 -; SDAG-GFX1100-NEXT: s_mov_b32 s3, s12 +; SDAG-GFX1100-NEXT: v_mov_b32_e32 v5, s0 ; SDAG-GFX1100-NEXT: s_or_b64 s[6:7], s[4:5], s[12:13] ; SDAG-GFX1100-NEXT: s_mov_b32 s13, s2 ; SDAG-GFX1100-NEXT: s_mov_b32 s2, s1 +; SDAG-GFX1100-NEXT: s_mov_b32 s3, s12 ; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; SDAG-GFX1100-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13] ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) @@ -1249,12 +1220,12 @@ define amdgpu_kernel void @memcpy_known_small(ptr addrspace(7) %src, ptr addrspa ; GISEL-GFX1100-NEXT: s_mov_b32 s8, s1 ; GISEL-GFX1100-NEXT: s_mov_b32 s9, s2 ; GISEL-GFX1100-NEXT: s_mov_b32 s10, s3 +; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen ; GISEL-GFX1100-NEXT: s_clause 0x1 ; GISEL-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x44 ; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x54 ; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v5, s0 -; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen ; GISEL-GFX1100-NEXT: s_mov_b32 s4, s1 ; GISEL-GFX1100-NEXT: s_mov_b32 s5, s2 ; GISEL-GFX1100-NEXT: s_mov_b32 s6, s3 |
