aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/select.f16.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/select.f16.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/select.f16.ll72
1 files changed, 27 insertions, 45 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll
index bbdfc76..da454ee 100644
--- a/llvm/test/CodeGen/AMDGPU/select.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll
@@ -852,19 +852,19 @@ define amdgpu_kernel void @select_v2f16(
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-TRUE16-NEXT: s_mov_b32 s22, s2
; GFX11-TRUE16-NEXT: s_mov_b32 s23, s3
-; GFX11-TRUE16-NEXT: s_mov_b32 s26, s2
-; GFX11-TRUE16-NEXT: s_mov_b32 s27, s3
; GFX11-TRUE16-NEXT: s_mov_b32 s18, s2
; GFX11-TRUE16-NEXT: s_mov_b32 s19, s3
+; GFX11-TRUE16-NEXT: s_mov_b32 s26, s2
+; GFX11-TRUE16-NEXT: s_mov_b32 s27, s3
; GFX11-TRUE16-NEXT: s_mov_b32 s6, s2
; GFX11-TRUE16-NEXT: s_mov_b32 s7, s3
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_mov_b32 s20, s12
; GFX11-TRUE16-NEXT: s_mov_b32 s21, s13
-; GFX11-TRUE16-NEXT: s_mov_b32 s24, s14
-; GFX11-TRUE16-NEXT: s_mov_b32 s25, s15
; GFX11-TRUE16-NEXT: s_mov_b32 s16, s10
; GFX11-TRUE16-NEXT: s_mov_b32 s17, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s24, s14
+; GFX11-TRUE16-NEXT: s_mov_b32 s25, s15
; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[20:23], 0
; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[16:19], 0
; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[24:27], 0
@@ -874,20 +874,18 @@ define amdgpu_kernel void @select_v2f16(
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2)
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
-; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v1.l, v0.l
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v3
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e64 s0, v5.l, v4.l
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v6.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v0.l, vcc_lo
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v2.l, v1.l, s0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v7.l, v1.l, s0
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s8
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-TRUE16-NEXT: s_endpgm
;
@@ -1058,21 +1056,18 @@ define amdgpu_kernel void @select_v2f16_imm_a(
; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[20:23], 0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2)
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, 0.5, v0.l
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, 0.5, v0.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e64 s0, 0x3900, v3.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v5.l, v1.l, s0
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v5.l, v1.l, s0
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s4
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-TRUE16-NEXT: s_endpgm
;
@@ -1236,21 +1231,18 @@ define amdgpu_kernel void @select_v2f16_imm_b(
; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[20:23], 0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2)
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0.5, v0.l
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v1
+; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0.5, v0.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e64 s0, 0x3900, v3.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v5.l, v1.l, s0
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v5.l, v1.l, s0
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s4
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-TRUE16-NEXT: s_endpgm
;
@@ -1402,8 +1394,6 @@ define amdgpu_kernel void @select_v2f16_imm_c(
; GFX11-TRUE16-NEXT: s_mov_b32 s19, s3
; GFX11-TRUE16-NEXT: s_mov_b32 s14, s2
; GFX11-TRUE16-NEXT: s_mov_b32 s15, s3
-; GFX11-TRUE16-NEXT: s_mov_b32 s22, s2
-; GFX11-TRUE16-NEXT: s_mov_b32 s23, s3
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_mov_b32 s16, s8
; GFX11-TRUE16-NEXT: s_mov_b32 s17, s9
@@ -1411,10 +1401,10 @@ define amdgpu_kernel void @select_v2f16_imm_c(
; GFX11-TRUE16-NEXT: s_mov_b32 s13, s7
; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[16:19], 0
; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-TRUE16-NEXT: s_mov_b32 s20, s10
-; GFX11-TRUE16-NEXT: s_mov_b32 s21, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s11
; GFX11-TRUE16-NEXT: s_mov_b32 s1, s5
-; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[20:23], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[12:15], 0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2)
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
@@ -1425,12 +1415,9 @@ define amdgpu_kernel void @select_v2f16_imm_c(
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v4.l, v3.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x3800, v2.l, vcc_lo
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3900, v0.l, s0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x3900, v0.l, s0
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s4
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
-; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT: buffer_store_b32 v1, off, s[0:3], 0
; GFX11-TRUE16-NEXT: s_endpgm
;
; GFX11-FAKE16-LABEL: select_v2f16_imm_c:
@@ -1581,8 +1568,6 @@ define amdgpu_kernel void @select_v2f16_imm_d(
; GFX11-TRUE16-NEXT: s_mov_b32 s19, s3
; GFX11-TRUE16-NEXT: s_mov_b32 s14, s2
; GFX11-TRUE16-NEXT: s_mov_b32 s15, s3
-; GFX11-TRUE16-NEXT: s_mov_b32 s22, s2
-; GFX11-TRUE16-NEXT: s_mov_b32 s23, s3
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-TRUE16-NEXT: s_mov_b32 s16, s8
; GFX11-TRUE16-NEXT: s_mov_b32 s17, s9
@@ -1590,10 +1575,10 @@ define amdgpu_kernel void @select_v2f16_imm_d(
; GFX11-TRUE16-NEXT: s_mov_b32 s13, s7
; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[16:19], 0
; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-TRUE16-NEXT: s_mov_b32 s20, s10
-; GFX11-TRUE16-NEXT: s_mov_b32 s21, s11
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s10
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s11
; GFX11-TRUE16-NEXT: s_mov_b32 s1, s5
-; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[20:23], 0
+; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[12:15], 0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2)
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
@@ -1604,12 +1589,9 @@ define amdgpu_kernel void @select_v2f16_imm_d(
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e64 s0, v4.l, v3.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x3800, v2.l, vcc_lo
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3900, v0.l, s0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x3900, v0.l, s0
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s4
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
-; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT: buffer_store_b32 v1, off, s[0:3], 0
; GFX11-TRUE16-NEXT: s_endpgm
;
; GFX11-FAKE16-LABEL: select_v2f16_imm_d: