diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/select.f16.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/select.f16.ll | 72 |
1 files changed, 27 insertions, 45 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll index bbdfc76..da454ee 100644 --- a/llvm/test/CodeGen/AMDGPU/select.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll @@ -852,19 +852,19 @@ define amdgpu_kernel void @select_v2f16( ; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-TRUE16-NEXT: s_mov_b32 s22, s2 ; GFX11-TRUE16-NEXT: s_mov_b32 s23, s3 -; GFX11-TRUE16-NEXT: s_mov_b32 s26, s2 -; GFX11-TRUE16-NEXT: s_mov_b32 s27, s3 ; GFX11-TRUE16-NEXT: s_mov_b32 s18, s2 ; GFX11-TRUE16-NEXT: s_mov_b32 s19, s3 +; GFX11-TRUE16-NEXT: s_mov_b32 s26, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 s27, s3 ; GFX11-TRUE16-NEXT: s_mov_b32 s6, s2 ; GFX11-TRUE16-NEXT: s_mov_b32 s7, s3 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_mov_b32 s20, s12 ; GFX11-TRUE16-NEXT: s_mov_b32 s21, s13 -; GFX11-TRUE16-NEXT: s_mov_b32 s24, s14 -; GFX11-TRUE16-NEXT: s_mov_b32 s25, s15 ; GFX11-TRUE16-NEXT: s_mov_b32 s16, s10 ; GFX11-TRUE16-NEXT: s_mov_b32 s17, s11 +; GFX11-TRUE16-NEXT: s_mov_b32 s24, s14 +; GFX11-TRUE16-NEXT: s_mov_b32 s25, s15 ; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[20:23], 0 ; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[16:19], 0 ; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[24:27], 0 @@ -874,20 +874,18 @@ define amdgpu_kernel void @select_v2f16( ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 -; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v1.l, v0.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v1.l, v0.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l ; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e64 s0, v5.l, v4.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v6.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v0.l, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v2.l, v1.l, s0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v7.l, v1.l, s0 ; GFX11-TRUE16-NEXT: s_mov_b32 s0, s8 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-TRUE16-NEXT: s_endpgm ; @@ -1058,21 +1056,18 @@ define amdgpu_kernel void @select_v2f16_imm_a( ; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[20:23], 0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, 0.5, v0.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, 0.5, v0.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e64 s0, 0x3900, v3.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l ; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v5.l, v1.l, s0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v5.l, v1.l, s0 ; GFX11-TRUE16-NEXT: s_mov_b32 s0, s4 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-TRUE16-NEXT: s_endpgm ; @@ -1236,21 +1231,18 @@ define amdgpu_kernel void @select_v2f16_imm_b( ; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[20:23], 0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0.5, v0.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0.5, v0.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 ; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e64 s0, 0x3900, v3.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l ; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v5.l, v1.l, s0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v5.l, v1.l, s0 ; GFX11-TRUE16-NEXT: s_mov_b32 s0, s4 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-TRUE16-NEXT: s_endpgm ; @@ -1402,8 +1394,6 @@ define amdgpu_kernel void @select_v2f16_imm_c( ; GFX11-TRUE16-NEXT: s_mov_b32 s19, s3 ; GFX11-TRUE16-NEXT: s_mov_b32 s14, s2 ; GFX11-TRUE16-NEXT: s_mov_b32 s15, s3 -; GFX11-TRUE16-NEXT: s_mov_b32 s22, s2 -; GFX11-TRUE16-NEXT: s_mov_b32 s23, s3 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_mov_b32 s16, s8 ; GFX11-TRUE16-NEXT: s_mov_b32 s17, s9 @@ -1411,10 +1401,10 @@ define amdgpu_kernel void @select_v2f16_imm_c( ; GFX11-TRUE16-NEXT: s_mov_b32 s13, s7 ; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[16:19], 0 ; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s20, s10 -; GFX11-TRUE16-NEXT: s_mov_b32 s21, s11 +; GFX11-TRUE16-NEXT: s_mov_b32 s12, s10 +; GFX11-TRUE16-NEXT: s_mov_b32 s13, s11 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, s5 -; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[20:23], 0 +; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[12:15], 0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) @@ -1425,12 +1415,9 @@ define amdgpu_kernel void @select_v2f16_imm_c( ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v4.l, v3.l ; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x3800, v2.l, vcc_lo -; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3900, v0.l, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x3900, v0.l, s0 ; GFX11-TRUE16-NEXT: s_mov_b32 s0, s4 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1 -; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 +; GFX11-TRUE16-NEXT: buffer_store_b32 v1, off, s[0:3], 0 ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: select_v2f16_imm_c: @@ -1581,8 +1568,6 @@ define amdgpu_kernel void @select_v2f16_imm_d( ; GFX11-TRUE16-NEXT: s_mov_b32 s19, s3 ; GFX11-TRUE16-NEXT: s_mov_b32 s14, s2 ; GFX11-TRUE16-NEXT: s_mov_b32 s15, s3 -; GFX11-TRUE16-NEXT: s_mov_b32 s22, s2 -; GFX11-TRUE16-NEXT: s_mov_b32 s23, s3 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_mov_b32 s16, s8 ; GFX11-TRUE16-NEXT: s_mov_b32 s17, s9 @@ -1590,10 +1575,10 @@ define amdgpu_kernel void @select_v2f16_imm_d( ; GFX11-TRUE16-NEXT: s_mov_b32 s13, s7 ; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[16:19], 0 ; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s20, s10 -; GFX11-TRUE16-NEXT: s_mov_b32 s21, s11 +; GFX11-TRUE16-NEXT: s_mov_b32 s12, s10 +; GFX11-TRUE16-NEXT: s_mov_b32 s13, s11 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, s5 -; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[20:23], 0 +; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[12:15], 0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) @@ -1604,12 +1589,9 @@ define amdgpu_kernel void @select_v2f16_imm_d( ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e64 s0, v4.l, v3.l ; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x3800, v2.l, vcc_lo -; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3900, v0.l, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x3900, v0.l, s0 ; GFX11-TRUE16-NEXT: s_mov_b32 s0, s4 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1 -; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 +; GFX11-TRUE16-NEXT: buffer_store_b32 v1, off, s[0:3], 0 ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: select_v2f16_imm_d: |