aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/load-local-i16.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-local-i16.ll115
1 files changed, 56 insertions, 59 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
index 062a985..b4c0b74 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
@@ -5734,20 +5734,19 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out,
; SI-NEXT: v_mov_b32_e32 v0, s1
; SI-NEXT: s_mov_b32 m0, -1
; SI-NEXT: ds_read_b64 v[0:1], v0
-; SI-NEXT: v_mov_b32_e32 v9, s0
+; SI-NEXT: v_mov_b32_e32 v8, s0
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_mov_b32_e32 v3, v1
-; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
-; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v1
-; SI-NEXT: v_ashrrev_i32_e32 v1, 16, v1
-; SI-NEXT: v_bfe_i32 v3, v3, 0, 16
-; SI-NEXT: v_bfe_i32 v5, v0, 0, 16
-; SI-NEXT: v_bfe_i32 v7, v4, 0, 16
-; SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3
-; SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5
-; SI-NEXT: v_ashrrev_i32_e32 v8, 31, v7
-; SI-NEXT: ds_write2_b64 v9, v[3:4], v[1:2] offset0:2 offset1:3
-; SI-NEXT: ds_write2_b64 v9, v[5:6], v[7:8] offset1:1
+; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; SI-NEXT: v_ashrrev_i32_e32 v3, 31, v1
+; SI-NEXT: v_ashrrev_i32_e32 v2, 16, v1
+; SI-NEXT: v_bfe_i32 v4, v1, 0, 16
+; SI-NEXT: v_bfe_i32 v0, v0, 0, 16
+; SI-NEXT: v_bfe_i32 v6, v5, 0, 16
+; SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; SI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
+; SI-NEXT: ds_write2_b64 v8, v[4:5], v[2:3] offset0:2 offset1:3
+; SI-NEXT: ds_write2_b64 v8, v[0:1], v[6:7] offset1:1
; SI-NEXT: s_endpgm
;
; VI-NO-DS128-LABEL: local_sextload_v4i16_to_v4i64:
@@ -5757,20 +5756,20 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out,
; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1
; VI-NO-DS128-NEXT: ds_read_b64 v[0:1], v0
-; VI-NO-DS128-NEXT: v_mov_b32_e32 v8, s0
; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v1
; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; VI-NO-DS128-NEXT: v_bfe_i32 v4, v3, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; VI-NO-DS128-NEXT: ds_write2_b64 v8, v[6:7], v[4:5] offset0:2 offset1:3
-; VI-NO-DS128-NEXT: ds_write2_b64 v8, v[0:1], v[2:3] offset1:1
+; VI-NO-DS128-NEXT: v_bfe_i32 v1, v1, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v7, v4, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v3, v0, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v5, v2, 0, 16
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v2, 31, v1
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v8, 31, v7
+; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s0
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v6, 31, v5
+; VI-NO-DS128-NEXT: ds_write2_b64 v0, v[1:2], v[7:8] offset0:2 offset1:3
+; VI-NO-DS128-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; VI-NO-DS128-NEXT: s_endpgm
;
; GFX9-NO-DS128-LABEL: local_sextload_v4i16_to_v4i64:
@@ -5779,20 +5778,20 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out,
; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NO-DS128-NEXT: ds_read_b64 v[0:1], v0
-; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v8, s0
+; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v9, s0
; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v1
; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v3, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v8, v[6:7], v[4:5] offset0:2 offset1:3
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v8, v[0:1], v[2:3] offset1:1
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v1, v1, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v7, v4, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v0, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v5, v2, 0, 16
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v2, 31, v1
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v8, 31, v7
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v6, 31, v5
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v9, v[1:2], v[7:8] offset0:2 offset1:3
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v9, v[3:4], v[5:6] offset1:1
; GFX9-NO-DS128-NEXT: s_endpgm
;
; EG-LABEL: local_sextload_v4i16_to_v4i64:
@@ -5846,22 +5845,21 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out,
; VI-DS128-NEXT: s_mov_b32 m0, -1
; VI-DS128-NEXT: s_waitcnt lgkmcnt(0)
; VI-DS128-NEXT: v_mov_b32_e32 v0, s1
-; VI-DS128-NEXT: ds_read_b64 v[0:1], v0
+; VI-DS128-NEXT: ds_read_b64 v[1:2], v0
; VI-DS128-NEXT: v_mov_b32_e32 v8, s0
; VI-DS128-NEXT: s_waitcnt lgkmcnt(0)
-; VI-DS128-NEXT: v_mov_b32_e32 v3, v1
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; VI-DS128-NEXT: v_bfe_i32 v4, v3, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v0, v2, 0, 16
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v4, v1, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v6, v3, 0, 16
+; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; VI-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; VI-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; VI-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:16
-; VI-DS128-NEXT: ds_write_b128 v8, v[0:3]
+; VI-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:16
+; VI-DS128-NEXT: ds_write_b128 v8, v[4:7]
; VI-DS128-NEXT: s_endpgm
;
; GFX9-DS128-LABEL: local_sextload_v4i16_to_v4i64:
@@ -5869,22 +5867,21 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out,
; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1
-; GFX9-DS128-NEXT: ds_read_b64 v[0:1], v0
+; GFX9-DS128-NEXT: ds_read_b64 v[1:2], v0
; GFX9-DS128-NEXT: v_mov_b32_e32 v8, s0
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DS128-NEXT: v_mov_b32_e32 v3, v1
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX9-DS128-NEXT: v_bfe_i32 v4, v3, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v0, v2, 0, 16
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX9-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
-; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
+; GFX9-DS128-NEXT: v_bfe_i32 v4, v1, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v6, v3, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:16
-; GFX9-DS128-NEXT: ds_write_b128 v8, v[0:3]
+; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
+; GFX9-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:16
+; GFX9-DS128-NEXT: ds_write_b128 v8, v[4:7]
; GFX9-DS128-NEXT: s_endpgm
%load = load <4 x i16>, ptr addrspace(3) %in
%ext = sext <4 x i16> %load to <4 x i64>