diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.log2.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.log2.ll | 75 |
1 files changed, 61 insertions, 14 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll index c1ac74e..7ca72bf 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll @@ -3830,20 +3830,67 @@ define float @v_log2_f32_from_fpext_bf16(bfloat %src) { ; GFX900-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX1100-LABEL: v_log2_f32_from_fpext_bf16: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX1100-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc_lo -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-NEXT: v_ldexp_f32 v0, v0, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX1100-SDAG-TRUE16-LABEL: v_log2_f32_from_fpext_bf16: +; GFX1100-SDAG-TRUE16: ; %bb.0: +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0 +; GFX1100-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l +; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v1 +; GFX1100-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 32, vcc_lo +; GFX1100-SDAG-TRUE16-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX1100-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-SDAG-TRUE16-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-SDAG-FAKE16-LABEL: v_log2_f32_from_fpext_bf16: +; GFX1100-SDAG-FAKE16: ; %bb.0: +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1100-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc_lo +; GFX1100-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-SDAG-FAKE16-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-FAKE16-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-TRUE16-LABEL: v_log2_f32_from_fpext_bf16: +; GFX1100-GISEL-TRUE16: ; %bb.0: +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0 +; GFX1100-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l +; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v1 +; GFX1100-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 32, vcc_lo +; GFX1100-GISEL-TRUE16-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX1100-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-GISEL-TRUE16-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-GISEL-FAKE16-LABEL: v_log2_f32_from_fpext_bf16: +; GFX1100-GISEL-FAKE16: ; %bb.0: +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1100-GISEL-FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX1100-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc_lo +; GFX1100-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-FAKE16-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-FAKE16-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_from_fpext_bf16: ; R600: ; %bb.0: |