aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll144
1 files changed, 43 insertions, 101 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
index afd0f01..6831380 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll
@@ -415,28 +415,18 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7)
; GISEL-GFX942-LABEL: memcpy_known:
; GISEL-GFX942: ; %bb.0:
; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GISEL-GFX942-NEXT: s_load_dword s7, s[4:5], 0x54
; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34
-; GISEL-GFX942-NEXT: s_mov_b32 s7, 0
; GISEL-GFX942-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x44
-; GISEL-GFX942-NEXT: s_mov_b32 s8, s7
+; GISEL-GFX942-NEXT: s_mov_b32 s16, 0
+; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x2000
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-GFX942-NEXT: s_mov_b32 s6, s1
+; GISEL-GFX942-NEXT: s_mov_b32 s8, s1
; GISEL-GFX942-NEXT: s_mov_b32 s9, s2
-; GISEL-GFX942-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9]
-; GISEL-GFX942-NEXT: s_mov_b32 s6, s3
-; GISEL-GFX942-NEXT: s_load_dword s3, s[4:5], 0x54
-; GISEL-GFX942-NEXT: s_mov_b32 s10, s7
-; GISEL-GFX942-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11]
-; GISEL-GFX942-NEXT: s_mov_b32 s6, s13
-; GISEL-GFX942-NEXT: s_mov_b32 s4, s7
+; GISEL-GFX942-NEXT: s_mov_b32 s10, s3
+; GISEL-GFX942-NEXT: s_mov_b32 s4, s13
; GISEL-GFX942-NEXT: s_mov_b32 s5, s14
-; GISEL-GFX942-NEXT: s_mov_b32 s16, 0
-; GISEL-GFX942-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
; GISEL-GFX942-NEXT: s_mov_b32 s6, s15
-; GISEL-GFX942-NEXT: s_mov_b32 s2, s7
-; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[2:3]
-; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x2000
; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, s16
; GISEL-GFX942-NEXT: .LBB0_1: ; %load-store-loop
; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -491,25 +481,16 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7)
; GISEL-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44
; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x34
; GISEL-GFX1100-NEXT: s_load_b32 s15, s[4:5], 0x54
-; GISEL-GFX1100-NEXT: s_mov_b32 s17, 0
-; GISEL-GFX1100-NEXT: s_mov_b32 s12, 0
-; GISEL-GFX1100-NEXT: s_mov_b32 s4, s17
-; GISEL-GFX1100-NEXT: s_mov_b32 s6, s17
-; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, s12
-; GISEL-GFX1100-NEXT: s_mov_b32 s14, s17
+; GISEL-GFX1100-NEXT: s_mov_b32 s4, 0
+; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, s4
; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_mov_b32 s16, s1
+; GISEL-GFX1100-NEXT: s_mov_b32 s4, s1
; GISEL-GFX1100-NEXT: s_mov_b32 s5, s2
-; GISEL-GFX1100-NEXT: s_mov_b32 s2, s17
-; GISEL-GFX1100-NEXT: s_or_b64 s[4:5], s[16:17], s[4:5]
-; GISEL-GFX1100-NEXT: s_mov_b32 s16, s3
-; GISEL-GFX1100-NEXT: s_mov_b32 s3, s10
-; GISEL-GFX1100-NEXT: s_or_b64 s[6:7], s[16:17], s[6:7]
-; GISEL-GFX1100-NEXT: s_mov_b32 s16, s9
-; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GISEL-GFX1100-NEXT: s_or_b64 s[12:13], s[16:17], s[2:3]
-; GISEL-GFX1100-NEXT: s_mov_b32 s16, s11
-; GISEL-GFX1100-NEXT: s_or_b64 s[14:15], s[16:17], s[14:15]
+; GISEL-GFX1100-NEXT: s_mov_b32 s6, s3
+; GISEL-GFX1100-NEXT: s_mov_b32 s12, s9
+; GISEL-GFX1100-NEXT: s_mov_b32 s13, s10
+; GISEL-GFX1100-NEXT: s_mov_b32 s14, s11
; GISEL-GFX1100-NEXT: .LBB0_1: ; %load-store-loop
; GISEL-GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v61, s0, v0
@@ -960,28 +941,18 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
; GISEL-GFX942-LABEL: memcpy_known_medium:
; GISEL-GFX942: ; %bb.0:
; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GISEL-GFX942-NEXT: s_load_dword s7, s[4:5], 0x54
; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34
-; GISEL-GFX942-NEXT: s_mov_b32 s7, 0
; GISEL-GFX942-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x44
-; GISEL-GFX942-NEXT: s_mov_b32 s8, s7
+; GISEL-GFX942-NEXT: s_mov_b32 s16, 0
+; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x100
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-GFX942-NEXT: s_mov_b32 s6, s1
+; GISEL-GFX942-NEXT: s_mov_b32 s8, s1
; GISEL-GFX942-NEXT: s_mov_b32 s9, s2
-; GISEL-GFX942-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9]
-; GISEL-GFX942-NEXT: s_mov_b32 s6, s3
-; GISEL-GFX942-NEXT: s_load_dword s3, s[4:5], 0x54
-; GISEL-GFX942-NEXT: s_mov_b32 s10, s7
-; GISEL-GFX942-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11]
-; GISEL-GFX942-NEXT: s_mov_b32 s6, s13
-; GISEL-GFX942-NEXT: s_mov_b32 s4, s7
+; GISEL-GFX942-NEXT: s_mov_b32 s10, s3
+; GISEL-GFX942-NEXT: s_mov_b32 s4, s13
; GISEL-GFX942-NEXT: s_mov_b32 s5, s14
-; GISEL-GFX942-NEXT: s_mov_b32 s16, 0
-; GISEL-GFX942-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
; GISEL-GFX942-NEXT: s_mov_b32 s6, s15
-; GISEL-GFX942-NEXT: s_mov_b32 s2, s7
-; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[2:3]
-; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x100
; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, s16
; GISEL-GFX942-NEXT: .LBB1_1: ; %load-store-loop
; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -1036,25 +1007,16 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
; GISEL-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44
; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x34
; GISEL-GFX1100-NEXT: s_load_b32 s15, s[4:5], 0x54
-; GISEL-GFX1100-NEXT: s_mov_b32 s17, 0
-; GISEL-GFX1100-NEXT: s_mov_b32 s12, 0
-; GISEL-GFX1100-NEXT: s_mov_b32 s4, s17
-; GISEL-GFX1100-NEXT: s_mov_b32 s6, s17
-; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, s12
-; GISEL-GFX1100-NEXT: s_mov_b32 s14, s17
+; GISEL-GFX1100-NEXT: s_mov_b32 s4, 0
+; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, s4
; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_mov_b32 s16, s1
+; GISEL-GFX1100-NEXT: s_mov_b32 s4, s1
; GISEL-GFX1100-NEXT: s_mov_b32 s5, s2
-; GISEL-GFX1100-NEXT: s_mov_b32 s2, s17
-; GISEL-GFX1100-NEXT: s_or_b64 s[4:5], s[16:17], s[4:5]
-; GISEL-GFX1100-NEXT: s_mov_b32 s16, s3
-; GISEL-GFX1100-NEXT: s_mov_b32 s3, s10
-; GISEL-GFX1100-NEXT: s_or_b64 s[6:7], s[16:17], s[6:7]
-; GISEL-GFX1100-NEXT: s_mov_b32 s16, s9
-; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GISEL-GFX1100-NEXT: s_or_b64 s[12:13], s[16:17], s[2:3]
-; GISEL-GFX1100-NEXT: s_mov_b32 s16, s11
-; GISEL-GFX1100-NEXT: s_or_b64 s[14:15], s[16:17], s[14:15]
+; GISEL-GFX1100-NEXT: s_mov_b32 s6, s3
+; GISEL-GFX1100-NEXT: s_mov_b32 s12, s9
+; GISEL-GFX1100-NEXT: s_mov_b32 s13, s10
+; GISEL-GFX1100-NEXT: s_mov_b32 s14, s11
; GISEL-GFX1100-NEXT: .LBB1_1: ; %load-store-loop
; GISEL-GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v61, s0, v0
@@ -1228,27 +1190,18 @@ define amdgpu_kernel void @memcpy_known_small(ptr addrspace(7) %src, ptr addrspa
; GISEL-GFX942: ; %bb.0:
; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34
-; GISEL-GFX942-NEXT: s_mov_b32 s7, 0
-; GISEL-GFX942-NEXT: s_mov_b32 s8, s7
-; GISEL-GFX942-NEXT: s_mov_b32 s10, s7
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-GFX942-NEXT: s_mov_b32 s6, s1
+; GISEL-GFX942-NEXT: s_mov_b32 s8, s1
; GISEL-GFX942-NEXT: s_mov_b32 s9, s2
-; GISEL-GFX942-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9]
-; GISEL-GFX942-NEXT: s_mov_b32 s6, s3
-; GISEL-GFX942-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11]
+; GISEL-GFX942-NEXT: s_mov_b32 s10, s3
; GISEL-GFX942-NEXT: v_mov_b32_e32 v4, s0
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[0:3], v4, s[8:11], 0 offen
; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44
-; GISEL-GFX942-NEXT: s_load_dword s13, s[4:5], 0x54
-; GISEL-GFX942-NEXT: s_mov_b32 s4, s7
-; GISEL-GFX942-NEXT: s_mov_b32 s12, s7
+; GISEL-GFX942-NEXT: s_load_dword s7, s[4:5], 0x54
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-GFX942-NEXT: s_mov_b32 s6, s1
+; GISEL-GFX942-NEXT: s_mov_b32 s4, s1
; GISEL-GFX942-NEXT: s_mov_b32 s5, s2
-; GISEL-GFX942-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
; GISEL-GFX942-NEXT: s_mov_b32 s6, s3
-; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[12:13]
; GISEL-GFX942-NEXT: v_mov_b32_e32 v5, s0
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen
@@ -1261,35 +1214,24 @@ define amdgpu_kernel void @memcpy_known_small(ptr addrspace(7) %src, ptr addrspa
; GISEL-GFX1100: ; %bb.0:
; GISEL-GFX1100-NEXT: s_clause 0x1
; GISEL-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x34
-; GISEL-GFX1100-NEXT: s_mov_b32 s13, 0
-; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GISEL-GFX1100-NEXT: s_mov_b32 s8, s13
-; GISEL-GFX1100-NEXT: s_mov_b32 s6, s13
+; GISEL-GFX1100-NEXT: s_load_b32 s11, s[4:5], 0x34
; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_mov_b32 s12, s1
-; GISEL-GFX1100-NEXT: s_mov_b32 s9, s2
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v4, s0
-; GISEL-GFX1100-NEXT: s_or_b64 s[0:1], s[12:13], s[8:9]
-; GISEL-GFX1100-NEXT: s_mov_b32 s12, s3
-; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GISEL-GFX1100-NEXT: s_or_b64 s[2:3], s[12:13], s[6:7]
-; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[0:3], 0 offen
+; GISEL-GFX1100-NEXT: s_mov_b32 s8, s1
+; GISEL-GFX1100-NEXT: s_mov_b32 s9, s2
+; GISEL-GFX1100-NEXT: s_mov_b32 s10, s3
+; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen
; GISEL-GFX1100-NEXT: s_clause 0x1
-; GISEL-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44
+; GISEL-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x44
; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x54
-; GISEL-GFX1100-NEXT: s_mov_b32 s4, s13
; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-GFX1100-NEXT: s_mov_b32 s12, s9
-; GISEL-GFX1100-NEXT: s_mov_b32 s5, s10
-; GISEL-GFX1100-NEXT: v_mov_b32_e32 v5, s8
-; GISEL-GFX1100-NEXT: s_or_b64 s[4:5], s[12:13], s[4:5]
-; GISEL-GFX1100-NEXT: s_mov_b32 s12, s11
-; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GISEL-GFX1100-NEXT: s_or_b64 s[6:7], s[12:13], s[6:7]
+; GISEL-GFX1100-NEXT: v_mov_b32_e32 v5, s0
+; GISEL-GFX1100-NEXT: s_mov_b32 s4, s1
+; GISEL-GFX1100-NEXT: s_mov_b32 s5, s2
+; GISEL-GFX1100-NEXT: s_mov_b32 s6, s3
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0)
; GISEL-GFX1100-NEXT: buffer_store_b128 v[0:3], v5, s[4:7], 0 offen
-; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[0:3], 0 offen offset:16
+; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen offset:16
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0)
; GISEL-GFX1100-NEXT: buffer_store_b128 v[0:3], v5, s[4:7], 0 offen offset:16
; GISEL-GFX1100-NEXT: s_endpgm