diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll | 144 |
1 files changed, 43 insertions, 101 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll index afd0f01..6831380 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll @@ -415,28 +415,18 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7) ; GISEL-GFX942-LABEL: memcpy_known: ; GISEL-GFX942: ; %bb.0: ; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GISEL-GFX942-NEXT: s_load_dword s7, s[4:5], 0x54 ; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34 -; GISEL-GFX942-NEXT: s_mov_b32 s7, 0 ; GISEL-GFX942-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x44 -; GISEL-GFX942-NEXT: s_mov_b32 s8, s7 +; GISEL-GFX942-NEXT: s_mov_b32 s16, 0 +; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x2000 ; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX942-NEXT: s_mov_b32 s6, s1 +; GISEL-GFX942-NEXT: s_mov_b32 s8, s1 ; GISEL-GFX942-NEXT: s_mov_b32 s9, s2 -; GISEL-GFX942-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9] -; GISEL-GFX942-NEXT: s_mov_b32 s6, s3 -; GISEL-GFX942-NEXT: s_load_dword s3, s[4:5], 0x54 -; GISEL-GFX942-NEXT: s_mov_b32 s10, s7 -; GISEL-GFX942-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] -; GISEL-GFX942-NEXT: s_mov_b32 s6, s13 -; GISEL-GFX942-NEXT: s_mov_b32 s4, s7 +; GISEL-GFX942-NEXT: s_mov_b32 s10, s3 +; GISEL-GFX942-NEXT: s_mov_b32 s4, s13 ; GISEL-GFX942-NEXT: s_mov_b32 s5, s14 -; GISEL-GFX942-NEXT: s_mov_b32 s16, 0 -; GISEL-GFX942-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] ; GISEL-GFX942-NEXT: s_mov_b32 s6, s15 -; GISEL-GFX942-NEXT: s_mov_b32 s2, s7 -; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[2:3] -; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x2000 ; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, s16 ; GISEL-GFX942-NEXT: .LBB0_1: ; %load-store-loop ; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -491,25 +481,16 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7) ; GISEL-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44 ; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x34 ; GISEL-GFX1100-NEXT: s_load_b32 s15, s[4:5], 0x54 -; GISEL-GFX1100-NEXT: s_mov_b32 s17, 0 -; GISEL-GFX1100-NEXT: s_mov_b32 s12, 0 -; GISEL-GFX1100-NEXT: s_mov_b32 s4, s17 -; GISEL-GFX1100-NEXT: s_mov_b32 s6, s17 -; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, s12 -; GISEL-GFX1100-NEXT: s_mov_b32 s14, s17 +; GISEL-GFX1100-NEXT: s_mov_b32 s4, 0 +; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, s4 ; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_mov_b32 s16, s1 +; GISEL-GFX1100-NEXT: s_mov_b32 s4, s1 ; GISEL-GFX1100-NEXT: s_mov_b32 s5, s2 -; GISEL-GFX1100-NEXT: s_mov_b32 s2, s17 -; GISEL-GFX1100-NEXT: s_or_b64 s[4:5], s[16:17], s[4:5] -; GISEL-GFX1100-NEXT: s_mov_b32 s16, s3 -; GISEL-GFX1100-NEXT: s_mov_b32 s3, s10 -; GISEL-GFX1100-NEXT: s_or_b64 s[6:7], s[16:17], s[6:7] -; GISEL-GFX1100-NEXT: s_mov_b32 s16, s9 -; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GISEL-GFX1100-NEXT: s_or_b64 s[12:13], s[16:17], s[2:3] -; GISEL-GFX1100-NEXT: s_mov_b32 s16, s11 -; GISEL-GFX1100-NEXT: s_or_b64 s[14:15], s[16:17], s[14:15] +; GISEL-GFX1100-NEXT: s_mov_b32 s6, s3 +; GISEL-GFX1100-NEXT: s_mov_b32 s12, s9 +; GISEL-GFX1100-NEXT: s_mov_b32 s13, s10 +; GISEL-GFX1100-NEXT: s_mov_b32 s14, s11 ; GISEL-GFX1100-NEXT: .LBB0_1: ; %load-store-loop ; GISEL-GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1 ; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v61, s0, v0 @@ -960,28 +941,18 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp ; GISEL-GFX942-LABEL: memcpy_known_medium: ; GISEL-GFX942: ; %bb.0: ; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GISEL-GFX942-NEXT: s_load_dword s7, s[4:5], 0x54 ; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34 -; GISEL-GFX942-NEXT: s_mov_b32 s7, 0 ; GISEL-GFX942-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x44 -; GISEL-GFX942-NEXT: s_mov_b32 s8, s7 +; GISEL-GFX942-NEXT: s_mov_b32 s16, 0 +; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x100 ; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX942-NEXT: s_mov_b32 s6, s1 +; GISEL-GFX942-NEXT: s_mov_b32 s8, s1 ; GISEL-GFX942-NEXT: s_mov_b32 s9, s2 -; GISEL-GFX942-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9] -; GISEL-GFX942-NEXT: s_mov_b32 s6, s3 -; GISEL-GFX942-NEXT: s_load_dword s3, s[4:5], 0x54 -; GISEL-GFX942-NEXT: s_mov_b32 s10, s7 -; GISEL-GFX942-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] -; GISEL-GFX942-NEXT: s_mov_b32 s6, s13 -; GISEL-GFX942-NEXT: s_mov_b32 s4, s7 +; GISEL-GFX942-NEXT: s_mov_b32 s10, s3 +; GISEL-GFX942-NEXT: s_mov_b32 s4, s13 ; GISEL-GFX942-NEXT: s_mov_b32 s5, s14 -; GISEL-GFX942-NEXT: s_mov_b32 s16, 0 -; GISEL-GFX942-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] ; GISEL-GFX942-NEXT: s_mov_b32 s6, s15 -; GISEL-GFX942-NEXT: s_mov_b32 s2, s7 -; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[2:3] -; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x100 ; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, s16 ; GISEL-GFX942-NEXT: .LBB1_1: ; %load-store-loop ; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -1036,25 +1007,16 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp ; GISEL-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44 ; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x34 ; GISEL-GFX1100-NEXT: s_load_b32 s15, s[4:5], 0x54 -; GISEL-GFX1100-NEXT: s_mov_b32 s17, 0 -; GISEL-GFX1100-NEXT: s_mov_b32 s12, 0 -; GISEL-GFX1100-NEXT: s_mov_b32 s4, s17 -; GISEL-GFX1100-NEXT: s_mov_b32 s6, s17 -; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, s12 -; GISEL-GFX1100-NEXT: s_mov_b32 s14, s17 +; GISEL-GFX1100-NEXT: s_mov_b32 s4, 0 +; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, s4 ; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_mov_b32 s16, s1 +; GISEL-GFX1100-NEXT: s_mov_b32 s4, s1 ; GISEL-GFX1100-NEXT: s_mov_b32 s5, s2 -; GISEL-GFX1100-NEXT: s_mov_b32 s2, s17 -; GISEL-GFX1100-NEXT: s_or_b64 s[4:5], s[16:17], s[4:5] -; GISEL-GFX1100-NEXT: s_mov_b32 s16, s3 -; GISEL-GFX1100-NEXT: s_mov_b32 s3, s10 -; GISEL-GFX1100-NEXT: s_or_b64 s[6:7], s[16:17], s[6:7] -; GISEL-GFX1100-NEXT: s_mov_b32 s16, s9 -; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GISEL-GFX1100-NEXT: s_or_b64 s[12:13], s[16:17], s[2:3] -; GISEL-GFX1100-NEXT: s_mov_b32 s16, s11 -; GISEL-GFX1100-NEXT: s_or_b64 s[14:15], s[16:17], s[14:15] +; GISEL-GFX1100-NEXT: s_mov_b32 s6, s3 +; GISEL-GFX1100-NEXT: s_mov_b32 s12, s9 +; GISEL-GFX1100-NEXT: s_mov_b32 s13, s10 +; GISEL-GFX1100-NEXT: s_mov_b32 s14, s11 ; GISEL-GFX1100-NEXT: .LBB1_1: ; %load-store-loop ; GISEL-GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1 ; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v61, s0, v0 @@ -1228,27 +1190,18 @@ define amdgpu_kernel void @memcpy_known_small(ptr addrspace(7) %src, ptr addrspa ; GISEL-GFX942: ; %bb.0: ; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34 -; GISEL-GFX942-NEXT: s_mov_b32 s7, 0 -; GISEL-GFX942-NEXT: s_mov_b32 s8, s7 -; GISEL-GFX942-NEXT: s_mov_b32 s10, s7 ; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX942-NEXT: s_mov_b32 s6, s1 +; GISEL-GFX942-NEXT: s_mov_b32 s8, s1 ; GISEL-GFX942-NEXT: s_mov_b32 s9, s2 -; GISEL-GFX942-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9] -; GISEL-GFX942-NEXT: s_mov_b32 s6, s3 -; GISEL-GFX942-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] +; GISEL-GFX942-NEXT: s_mov_b32 s10, s3 ; GISEL-GFX942-NEXT: v_mov_b32_e32 v4, s0 ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[0:3], v4, s[8:11], 0 offen ; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44 -; GISEL-GFX942-NEXT: s_load_dword s13, s[4:5], 0x54 -; GISEL-GFX942-NEXT: s_mov_b32 s4, s7 -; GISEL-GFX942-NEXT: s_mov_b32 s12, s7 +; GISEL-GFX942-NEXT: s_load_dword s7, s[4:5], 0x54 ; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX942-NEXT: s_mov_b32 s6, s1 +; GISEL-GFX942-NEXT: s_mov_b32 s4, s1 ; GISEL-GFX942-NEXT: s_mov_b32 s5, s2 -; GISEL-GFX942-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] ; GISEL-GFX942-NEXT: s_mov_b32 s6, s3 -; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[12:13] ; GISEL-GFX942-NEXT: v_mov_b32_e32 v5, s0 ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen @@ -1261,35 +1214,24 @@ define amdgpu_kernel void @memcpy_known_small(ptr addrspace(7) %src, ptr addrspa ; GISEL-GFX1100: ; %bb.0: ; GISEL-GFX1100-NEXT: s_clause 0x1 ; GISEL-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x34 -; GISEL-GFX1100-NEXT: s_mov_b32 s13, 0 -; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GISEL-GFX1100-NEXT: s_mov_b32 s8, s13 -; GISEL-GFX1100-NEXT: s_mov_b32 s6, s13 +; GISEL-GFX1100-NEXT: s_load_b32 s11, s[4:5], 0x34 ; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_mov_b32 s12, s1 -; GISEL-GFX1100-NEXT: s_mov_b32 s9, s2 ; GISEL-GFX1100-NEXT: v_mov_b32_e32 v4, s0 -; GISEL-GFX1100-NEXT: s_or_b64 s[0:1], s[12:13], s[8:9] -; GISEL-GFX1100-NEXT: s_mov_b32 s12, s3 -; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GISEL-GFX1100-NEXT: s_or_b64 s[2:3], s[12:13], s[6:7] -; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[0:3], 0 offen +; GISEL-GFX1100-NEXT: s_mov_b32 s8, s1 +; GISEL-GFX1100-NEXT: s_mov_b32 s9, s2 +; GISEL-GFX1100-NEXT: s_mov_b32 s10, s3 +; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen ; GISEL-GFX1100-NEXT: s_clause 0x1 -; GISEL-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44 +; GISEL-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x44 ; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x54 -; GISEL-GFX1100-NEXT: s_mov_b32 s4, s13 ; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-GFX1100-NEXT: s_mov_b32 s12, s9 -; GISEL-GFX1100-NEXT: s_mov_b32 s5, s10 -; GISEL-GFX1100-NEXT: v_mov_b32_e32 v5, s8 -; GISEL-GFX1100-NEXT: s_or_b64 s[4:5], s[12:13], s[4:5] -; GISEL-GFX1100-NEXT: s_mov_b32 s12, s11 -; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GISEL-GFX1100-NEXT: s_or_b64 s[6:7], s[12:13], s[6:7] +; GISEL-GFX1100-NEXT: v_mov_b32_e32 v5, s0 +; GISEL-GFX1100-NEXT: s_mov_b32 s4, s1 +; GISEL-GFX1100-NEXT: s_mov_b32 s5, s2 +; GISEL-GFX1100-NEXT: s_mov_b32 s6, s3 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) ; GISEL-GFX1100-NEXT: buffer_store_b128 v[0:3], v5, s[4:7], 0 offen -; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[0:3], 0 offen offset:16 +; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen offset:16 ; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) ; GISEL-GFX1100-NEXT: buffer_store_b128 v[0:3], v5, s[4:7], 0 offen offset:16 ; GISEL-GFX1100-NEXT: s_endpgm |
