; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-SDAG,SI-SDAG %s ; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-GISEL,SI-GISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-SDAG,VI-SDAG %s ; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-GISEL,VI-GISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-SDAG,GFX900-SDAG %s ; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-GISEL,GFX900-GISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-SDAG %s ; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-GISEL %s ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s ; RUN: llc -mtriple=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) { ; SI-SDAG-LABEL: s_log2_f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dword s2, s[0:1], 0xb ; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc ; SI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s2, -1 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 ; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_log2_f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_load_dword s2, s[0:1], 0xb ; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc ; SI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: s_mov_b32 s2, -1 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-GISEL-NEXT: s_endpgm ; ; VI-SDAG-LABEL: s_log2_f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dword s2, s[0:1], 0x2c ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc ; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v1, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_log2_f32: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_log2_f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dword s4, s[0:1], 0x2c ; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 ; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[2:3] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log2_f32: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; ; GFX1100-SDAG-LABEL: s_log2_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_clause 0x1 ; GFX1100-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s3 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 ; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX1100-SDAG-NEXT: s_nop 0 ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-SDAG-NEXT: s_endpgm ; ; GFX1100-GISEL-LABEL: s_log2_f32: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_clause 0x1 ; GFX1100-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v1 :: v_dual_mov_b32 v1, 0 ; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1100-GISEL-NEXT: s_nop 0 ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log2_f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[] ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD ; R600-NEXT: ALU clause starting at 4: ; R600-NEXT: SETGT * T0.W, literal.x, KC0[2].Z, ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; R600-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) ; R600-NEXT: MUL_IEEE T1.W, KC0[2].Z, PV.W, ; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x, ; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) ; R600-NEXT: LOG_IEEE * T0.X, PV.W, ; R600-NEXT: ADD T0.X, PS, -T0.W, ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_log2_f32: ; CM: ; %bb.0: ; CM-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[] ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X ; CM-NEXT: CF_END ; CM-NEXT: PAD ; CM-NEXT: ALU clause starting at 4: ; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z, ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; CM-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x, ; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) ; CM-NEXT: CNDE T0.Z, T0.W, 0.0, literal.x, ; CM-NEXT: MUL_IEEE * T0.W, KC0[2].Z, PV.W, ; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) ; CM-NEXT: LOG_IEEE T0.X, T0.W, ; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, ; CM-NEXT: ADD * T0.X, PV.X, -T0.Z, ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call float @llvm.log2.f32(float %in) store float %result, ptr addrspace(1) %out ret void } ; FIXME: We should be able to merge these packets together on Cayman so we ; have a maximum of 4 instructions. define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; SI-SDAG-LABEL: s_log2_v2f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc ; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc ; SI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v4, v4 ; SI-SDAG-NEXT: v_log_f32_e32 v3, v1 ; SI-SDAG-NEXT: s_mov_b32 s6, -1 ; SI-SDAG-NEXT: s_mov_b32 s4, s0 ; SI-SDAG-NEXT: s_mov_b32 s5, s1 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v2 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_log2_v2f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] ; SI-GISEL-NEXT: v_mul_f32_e32 v3, s6, v3 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v3, v3 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v0 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v3, v0 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm ; ; VI-SDAG-LABEL: s_log2_v2f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc ; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; VI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 ; VI-SDAG-NEXT: v_log_f32_e32 v2, v1 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v3 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0 ; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_log2_v2f32: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] ; VI-GISEL-NEXT: v_mul_f32_e32 v3, s6, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v3, v3 ; VI-GISEL-NEXT: v_log_f32_e32 v1, v0 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v3, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_log2_v2f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4 ; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v1 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v4, v3 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 ; GFX900-SDAG-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log2_v2f32: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] ; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, s6, v3 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v3 ; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v3, v0 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX900-GISEL-NEXT: s_endpgm ; ; GFX1100-SDAG-LABEL: s_log2_v2f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v4, 0 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s3 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s4 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s5 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s4 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s5 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, s3, v1 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v3, s2, v3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v1, v0 :: v_dual_sub_f32 v0, v3, v2 ; GFX1100-SDAG-NEXT: global_store_b64 v4, v[0:1], s[0:1] ; GFX1100-SDAG-NEXT: s_nop 0 ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-SDAG-NEXT: s_endpgm ; ; GFX1100-GISEL-LABEL: s_log2_v2f32: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s3 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s5 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1100-GISEL-NEXT: s_nop 0 ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log2_v2f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 18, @4, KC0[CB0:0-32], KC1[] ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD ; R600-NEXT: ALU clause starting at 4: ; R600-NEXT: SETGT T0.W, literal.x, KC0[3].X, ; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].W, ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) ; R600-NEXT: MUL_IEEE T2.W, KC0[3].X, PV.W, ; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) ; R600-NEXT: MUL_IEEE T0.Z, KC0[2].W, PS, ; R600-NEXT: CNDE T0.W, T0.W, 0.0, literal.x, ; R600-NEXT: LOG_IEEE * T0.X, PV.W, ; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) ; R600-NEXT: ADD T0.Y, PS, -PV.W, ; R600-NEXT: CNDE T0.W, T1.W, 0.0, literal.x, ; R600-NEXT: LOG_IEEE * T0.X, PV.Z, ; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) ; R600-NEXT: ADD T0.X, PS, -PV.W, ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_log2_v2f32: ; CM: ; %bb.0: ; CM-NEXT: ALU 23, @4, KC0[CB0:0-32], KC1[] ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X ; CM-NEXT: CF_END ; CM-NEXT: PAD ; CM-NEXT: ALU clause starting at 4: ; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].X, ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; CM-NEXT: CNDE T0.Z, PV.W, 1.0, literal.x, ; CM-NEXT: SETGT * T1.W, literal.y, KC0[2].W, ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) ; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x, ; CM-NEXT: CNDE T1.Z, T0.W, 0.0, literal.y, ; CM-NEXT: MUL_IEEE * T0.W, KC0[3].X, PV.Z, ; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) ; CM-NEXT: LOG_IEEE T0.X, T0.W, ; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, ; CM-NEXT: ADD T1.Y, PV.X, -T1.Z, ; CM-NEXT: CNDE T0.Z, T1.W, 0.0, literal.x, ; CM-NEXT: MUL_IEEE * T0.W, KC0[2].W, T0.Y, ; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) ; CM-NEXT: LOG_IEEE T0.X, T0.W, ; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, ; CM-NEXT: ADD * T1.X, PV.X, -T0.Z, ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call <2 x float> @llvm.log2.v2f32(<2 x float> %in) store <2 x float> %result, ptr addrspace(1) %out ret void } define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; SI-SDAG-LABEL: s_log2_v3f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd ; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc ; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc ; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc ; SI-SDAG-NEXT: v_mul_f32_e32 v4, s5, v4 ; SI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v4, v4 ; SI-SDAG-NEXT: v_mul_f32_e32 v6, s4, v6 ; SI-SDAG-NEXT: v_log_f32_e32 v3, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v6, v6 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v1, vcc ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SI-SDAG-NEXT: s_mov_b32 s2, -1 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v2 ; SI-SDAG-NEXT: v_sub_f32_e32 v2, v3, v7 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v6, v5 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_log2_v3f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd ; SI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc ; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v1 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v1 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc ; SI-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1] ; SI-GISEL-NEXT: v_log_f32_e32 v4, v4 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v1 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] ; SI-GISEL-NEXT: s_mov_b32 s10, -1 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:8 ; SI-GISEL-NEXT: s_endpgm ; ; VI-SDAG-LABEL: s_log2_v3f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc ; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc ; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc ; VI-SDAG-NEXT: v_mul_f32_e32 v4, s6, v4 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6 ; VI-SDAG-NEXT: v_log_f32_e32 v3, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v6, v6 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v4, v2 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0 ; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_log2_v3f32: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 ; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v1 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v1 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1] ; VI-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v4, v4 ; VI-GISEL-NEXT: v_log_f32_e32 v2, v1 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s2 ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_log2_v3f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc ; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s6, v4 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s5, v6 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4 ; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6 ; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v1 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v4, v2 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0 ; GFX900-SDAG-NEXT: global_store_dwordx3 v7, v[0:2], s[2:3] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log2_v3f32: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s5, v1 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v1 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1] ; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s6, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v4 ; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v1 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] ; GFX900-GISEL-NEXT: s_endpgm ; ; GFX1100-SDAG-LABEL: s_log2_v3f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_clause 0x1 ; GFX1100-SDAG-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s6 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s5 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s4 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, s3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s7 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3 ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v2, s6, v2 ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v4, s5, v4 :: v_dual_mul_f32 v5, s4, v5 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s7 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, v4 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, v5 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v6, 0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_dual_sub_f32 v2, v2, v0 :: v_dual_sub_f32 v1, v4, v1 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v5, v3 ; GFX1100-SDAG-NEXT: global_store_b96 v6, v[0:2], s[0:1] ; GFX1100-SDAG-NEXT: s_nop 0 ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-SDAG-NEXT: s_endpgm ; ; GFX1100-GISEL-LABEL: s_log2_v3f32: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_clause 0x1 ; GFX1100-GISEL-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s4 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s5 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s6 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s3 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s2 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_mov_b32 v3, 0 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5 ; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1] ; GFX1100-GISEL-NEXT: s_nop 0 ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log2_v3f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 29, @4, KC0[CB0:0-32], KC1[] ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: ALU clause starting at 4: ; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z, ; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].Y, ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) ; R600-NEXT: MUL_IEEE T2.W, KC0[3].Z, PV.W, ; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) ; R600-NEXT: MUL_IEEE T0.Y, KC0[3].Y, PS, ; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].W, ; R600-NEXT: CNDE T0.W, T0.W, 0.0, literal.y, ; R600-NEXT: LOG_IEEE * T0.X, PV.W, ; R600-NEXT: 8388608(1.175494e-38), 1107296256(3.200000e+01) ; R600-NEXT: ADD T1.Y, PS, -PV.W, ; R600-NEXT: CNDE T1.Z, PV.Z, 1.0, literal.x, ; R600-NEXT: CNDE T0.W, T1.W, 0.0, literal.y, ; R600-NEXT: LOG_IEEE * T0.X, PV.Y, ; R600-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) ; R600-NEXT: ADD T1.X, PS, -PV.W, ; R600-NEXT: MUL_IEEE T0.W, KC0[3].W, PV.Z, ; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; R600-NEXT: CNDE T1.W, T0.Z, 0.0, literal.x, ; R600-NEXT: LOG_IEEE * T0.Y, PV.W, ; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) ; R600-NEXT: ADD T2.X, PS, -PV.W, ; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, ; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00) ; R600-NEXT: LSHR * T3.X, PV.W, literal.x, ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_log2_v3f32: ; CM: ; %bb.0: ; CM-NEXT: ALU 35, @4, KC0[CB0:0-32], KC1[] ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T2.X ; CM-NEXT: CF_END ; CM-NEXT: ALU clause starting at 4: ; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].W, ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x, ; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z, ; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].Y, ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) ; CM-NEXT: CNDE T0.X, PV.W, 1.0, literal.x, ; CM-NEXT: CNDE T1.Y, PV.Z, 1.0, literal.x, ; CM-NEXT: CNDE T1.Z, T0.W, 0.0, literal.y, ; CM-NEXT: MUL_IEEE * T0.W, KC0[3].W, PV.Y, ; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) ; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W, ; CM-NEXT: LOG_IEEE T0.Y, T0.W, ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, ; CM-NEXT: ADD T1.X, PV.Y, -T1.Z, ; CM-NEXT: CNDE T0.Y, T0.Z, 0.0, literal.x, ; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, ; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Z, T1.Y, ; CM-NEXT: 1107296256(3.200000e+01), 8(1.121039e-44) ; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W, ; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, ; CM-NEXT: LOG_IEEE * T0.W, T0.W, ; CM-NEXT: LSHR T2.X, T0.Z, literal.x, ; CM-NEXT: ADD T0.Y, PV.W, -T0.Y, ; CM-NEXT: CNDE T0.Z, T1.W, 0.0, literal.y, ; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Y, T0.X, ; CM-NEXT: 2(2.802597e-45), 1107296256(3.200000e+01) ; CM-NEXT: LOG_IEEE T0.X, T0.W, ; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, ; CM-NEXT: ADD * T0.X, PV.X, -T0.Z, ; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call <3 x float> @llvm.log2.v3f32(<3 x float> %in) store <3 x float> %result, ptr addrspace(1) %out ret void } ; FIXME: We should be able to merge these packets together on Cayman so we ; have a maximum of 4 instructions. define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-SDAG-LABEL: s_log2_v4f32: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc ; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc ; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v1, vcc ; SI-SDAG-NEXT: v_cndmask_b32_e32 v8, 1.0, v3, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc ; SI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 ; SI-SDAG-NEXT: v_mul_f32_e32 v6, s2, v6 ; SI-SDAG-NEXT: v_mul_f32_e32 v8, s1, v8 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, s0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v4, v4 ; SI-SDAG-NEXT: v_log_f32_e32 v6, v6 ; SI-SDAG-NEXT: v_log_f32_e32 v8, v8 ; SI-SDAG-NEXT: v_log_f32_e32 v9, v1 ; SI-SDAG-NEXT: s_mov_b32 s6, -1 ; SI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v2 ; SI-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5 ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v8, v7 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v9, v0 ; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: s_log2_v4f32: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd ; SI-GISEL-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2 ; SI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2 ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] ; SI-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5 ; SI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2 ; SI-GISEL-NEXT: v_log_f32_e32 v5, v5 ; SI-GISEL-NEXT: v_log_f32_e32 v3, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2 ; SI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 ; SI-GISEL-NEXT: s_mov_b32 s10, -1 ; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 ; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 ; SI-GISEL-NEXT: s_endpgm ; ; VI-SDAG-LABEL: s_log2_v4f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc ; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc ; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v1, vcc ; VI-SDAG-NEXT: v_cndmask_b32_e32 v8, 1.0, v3, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, s7, v4 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc ; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 ; VI-SDAG-NEXT: v_log_f32_e32 v6, v6 ; VI-SDAG-NEXT: v_mul_f32_e32 v8, s5, v8 ; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v8, v8 ; VI-SDAG-NEXT: v_log_f32_e32 v9, v1 ; VI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v2 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1 ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v8, v7 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v9, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0 ; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-SDAG-NEXT: s_endpgm ; ; VI-GISEL-LABEL: s_log2_v4f32: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 ; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2 ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] ; VI-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2 ; VI-GISEL-NEXT: v_log_f32_e32 v5, v5 ; VI-GISEL-NEXT: v_log_f32_e32 v3, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2 ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s2 ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-GISEL-NEXT: s_endpgm ; ; GFX900-SDAG-LABEL: s_log2_v4f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v3, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v1, vcc ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc ; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, s7, v5 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s6, v7 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v9, s5, v9 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v5, v5 ; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v7 ; GFX900-SDAG-NEXT: v_log_f32_e32 v9, v9 ; GFX900-SDAG-NEXT: v_log_f32_e32 v10, v1 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v5, v2 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v7, v6 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v9, v8 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v10, v0 ; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log2_v4f32: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s5, v2 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s5, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v2 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v2 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] ; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s6, v5 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s7, v2 ; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v5 ; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX900-GISEL-NEXT: s_endpgm ; ; GFX1100-SDAG-LABEL: s_log2_v4f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_clause 0x1 ; GFX1100-SDAG-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s7 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s6 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s5 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s4 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 1.0, 0x4f800000, s8 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v7, 1.0, 0x4f800000, s9 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s7, v2 :: v_dual_mul_f32 v3, s6, v3 ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v6, s5, v6 :: v_dual_mul_f32 v7, s4, v7 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v8, v3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(TRANS32_DEP_3) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v6, v6 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v7, v7 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s8 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s9 ; GFX1100-SDAG-NEXT: v_mov_b32_e32 v9, 0 ; GFX1100-SDAG-NEXT: v_dual_sub_f32 v3, v2, v0 :: v_dual_sub_f32 v2, v8, v1 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v6, v4 :: v_dual_sub_f32 v0, v7, v5 ; GFX1100-SDAG-NEXT: global_store_b128 v9, v[0:3], s[0:1] ; GFX1100-SDAG-NEXT: s_nop 0 ; GFX1100-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-SDAG-NEXT: s_endpgm ; ; GFX1100-GISEL-LABEL: s_log2_v4f32: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_clause 0x1 ; GFX1100-GISEL-NEXT: s_load_b128 s[4:7], s[0:1], 0x34 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s4 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s5 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s6 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s7 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s3 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s4, v0 :: v_dual_mul_f32 v1, s5, v1 ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s6, v2 :: v_dual_mul_f32 v3, s7, v3 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s3 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(TRANS32_DEP_3) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x42000000, s8 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 0x42000000, s9 ; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_dual_sub_f32 v2, v2, v6 :: v_dual_sub_f32 v3, v3, v7 ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] ; GFX1100-GISEL-NEXT: s_nop 0 ; GFX1100-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log2_v4f32: ; R600: ; %bb.0: ; R600-NEXT: ALU 33, @4, KC0[CB0:0-32], KC1[] ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 ; R600-NEXT: CF_END ; R600-NEXT: PAD ; R600-NEXT: ALU clause starting at 4: ; R600-NEXT: SETGT T0.W, literal.x, KC0[4].X, ; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].W, ; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x, ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) ; R600-NEXT: MUL_IEEE T0.Z, KC0[4].X, PV.W, ; R600-NEXT: SETGT T2.W, literal.x, KC0[3].Z, ; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.y, ; R600-NEXT: 8388608(1.175494e-38), 1333788672(4.294967e+09) ; R600-NEXT: MUL_IEEE T0.X, KC0[3].W, PS, ; R600-NEXT: CNDE T0.Y, T0.W, 0.0, literal.x, ; R600-NEXT: SETGT T1.Z, literal.y, KC0[3].Y, ; R600-NEXT: CNDE T0.W, PV.W, 1.0, literal.z, ; R600-NEXT: LOG_IEEE * T0.Z, PV.Z, ; R600-NEXT: 1107296256(3.200000e+01), 8388608(1.175494e-38) ; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) ; R600-NEXT: MUL_IEEE T1.X, KC0[3].Z, PV.W, ; R600-NEXT: CNDE T1.Y, T1.W, 0.0, literal.x, ; R600-NEXT: CNDE T2.Z, PV.Z, 1.0, literal.y, ; R600-NEXT: ADD T0.W, PS, -PV.Y, ; R600-NEXT: LOG_IEEE * T0.X, PV.X, ; R600-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09) ; R600-NEXT: MUL_IEEE T2.Y, KC0[3].Y, PV.Z, ; R600-NEXT: ADD T0.Z, PS, -PV.Y, ; R600-NEXT: CNDE T1.W, T2.W, 0.0, literal.x, ; R600-NEXT: LOG_IEEE * T0.X, PV.X, ; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) ; R600-NEXT: ADD T0.Y, PS, -PV.W, ; R600-NEXT: CNDE T1.W, T1.Z, 0.0, literal.x, ; R600-NEXT: LOG_IEEE * T0.X, PV.Y, ; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) ; R600-NEXT: ADD T0.X, PS, -PV.W, ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: s_log2_v4f32: ; CM: ; %bb.0: ; CM-NEXT: ALU 43, @4, KC0[CB0:0-32], KC1[] ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X ; CM-NEXT: CF_END ; CM-NEXT: PAD ; CM-NEXT: ALU clause starting at 4: ; CM-NEXT: SETGT * T0.W, literal.x, KC0[4].X, ; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) ; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x, ; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W, ; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].Z, ; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) ; CM-NEXT: CNDE T0.X, PV.W, 1.0, literal.x, ; CM-NEXT: CNDE T1.Y, T0.W, 0.0, literal.y, ; CM-NEXT: CNDE T1.Z, PV.Z, 1.0, literal.x, ; CM-NEXT: MUL_IEEE * T0.W, KC0[4].X, PV.Y, ; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) ; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W, ; CM-NEXT: LOG_IEEE T0.Y, T0.W, ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, ; CM-NEXT: CNDE T1.X, T0.Z, 0.0, literal.x, ; CM-NEXT: SETGT T2.Y, literal.y, KC0[3].Y, ; CM-NEXT: MUL_IEEE T0.Z, KC0[3].W, T1.Z, ; CM-NEXT: ADD * T0.W, PV.Y, -T1.Y, ; CM-NEXT: 1107296256(3.200000e+01), 8388608(1.175494e-38) ; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.Z, ; CM-NEXT: LOG_IEEE T0.Y, T0.Z, ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.Z, ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.Z, ; CM-NEXT: CNDE T2.X, T2.Y, 1.0, literal.x, ; CM-NEXT: CNDE T1.Y, T1.W, 0.0, literal.y, ; CM-NEXT: ADD T0.Z, PV.Y, -T1.X, ; CM-NEXT: MUL_IEEE * T1.W, KC0[3].Z, T0.X, BS:VEC_021/SCL_122 ; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) ; CM-NEXT: LOG_IEEE T0.X, T1.W, ; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W, ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W, ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W, ; CM-NEXT: ADD T0.Y, PV.X, -T1.Y, ; CM-NEXT: CNDE T1.Z, T2.Y, 0.0, literal.x, ; CM-NEXT: MUL_IEEE * T1.W, KC0[3].Y, T2.X, ; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) ; CM-NEXT: LOG_IEEE T0.X, T1.W, ; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W, ; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W, ; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W, ; CM-NEXT: ADD * T0.X, PV.X, -T1.Z, ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %result = call <4 x float> @llvm.log2.v4f32(<4 x float> %in) store <4 x float> %result, ptr addrspace(1) %out ret void } define float @v_log2_f32(float %in) { ; GFX689-SDAG-LABEL: v_log2_f32: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_fabs_f32(float %in) { ; GFX689-SDAG-LABEL: v_log2_fabs_f32: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_fabs_f32: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_fabs_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 ; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_fabs_f32: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_fabs_f32: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_fabs_f32: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %fabs = call float @llvm.fabs.f32(float %in) %result = call float @llvm.log2.f32(float %fabs) ret float %result } define float @v_log2_fneg_fabs_f32(float %in) { ; GFX689-SDAG-LABEL: v_log2_fneg_fabs_f32: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x80800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_fneg_fabs_f32: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_fneg_fabs_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 ; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_fneg_fabs_f32: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -|v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_fneg_fabs_f32: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_fneg_fabs_f32: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %fabs = call float @llvm.fabs.f32(float %in) %fneg.fabs = fneg float %fabs %result = call float @llvm.log2.f32(float %fneg.fabs) ret float %result } define float @v_log2_fneg_f32(float %in) { ; GFX689-SDAG-LABEL: v_log2_fneg_f32: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x80800000 ; GFX689-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_fneg_f32: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_fneg_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_fneg_f32: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_fneg_f32: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_fneg_f32: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %fneg = fneg float %in %result = call float @llvm.log2.f32(float %fneg) ret float %result } define float @v_log2_f32_fast(float %in) { ; GFX689-SDAG-LABEL: v_log2_f32_fast: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_fast: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_fast: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_fast: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_fast: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call fast float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; GFX689-SDAG-LABEL: v_log2_f32_unsafe_math_attr: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_unsafe_math_attr: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_unsafe_math_attr: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_unsafe_math_attr: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_unsafe_math_attr: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_unsafe_math_attr: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { ; GFX689-SDAG-LABEL: v_log2_f32_approx_fn_attr: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_approx_fn_attr: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_approx_fn_attr: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_approx_fn_attr: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_approx_fn_attr: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_approx_fn_attr: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_ninf(float %in) { ; GFX689-SDAG-LABEL: v_log2_f32_ninf: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_ninf: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_ninf: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_ninf: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_ninf: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_ninf: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call ninf float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_afn(float %in) { ; GFX689-SDAG-LABEL: v_log2_f32_afn: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_afn: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_afn: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_afn: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_afn: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_afn: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call afn float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_afn_daz(float %in) #0 { ; GFX689-LABEL: v_log2_f32_afn_daz: ; GFX689: ; %bb.0: ; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-NEXT: v_log_f32_e32 v0, v0 ; GFX689-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_f32_afn_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_afn_daz: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_afn_daz: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call afn float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_afn_dynamic(float %in) #1 { ; GFX689-SDAG-LABEL: v_log2_f32_afn_dynamic: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_afn_dynamic: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_afn_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_afn_dynamic: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_afn_dynamic: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_afn_dynamic: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call afn float @llvm.log2.f32(float %in) ret float %result } define float @v_fabs_log2_f32_afn(float %in) { ; GFX689-SDAG-LABEL: v_fabs_log2_f32_afn: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_fabs_log2_f32_afn: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_fabs_log2_f32_afn: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 ; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_fabs_log2_f32_afn: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fabs_log2_f32_afn: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_fabs_log2_f32_afn: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %fabs = call float @llvm.fabs.f32(float %in) %result = call afn float @llvm.log2.f32(float %fabs) ret float %result } define float @v_log2_f32_daz(float %in) #0 { ; GFX689-LABEL: v_log2_f32_daz: ; GFX689: ; %bb.0: ; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-NEXT: v_log_f32_e32 v0, v0 ; GFX689-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_f32_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_daz: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_daz: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_nnan(float %in) { ; GFX689-SDAG-LABEL: v_log2_f32_nnan: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_nnan: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_nnan: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_nnan: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_nnan: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_nnan: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call nnan float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_nnan_daz(float %in) #0 { ; GFX689-LABEL: v_log2_f32_nnan_daz: ; GFX689: ; %bb.0: ; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-NEXT: v_log_f32_e32 v0, v0 ; GFX689-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_f32_nnan_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_nnan_daz: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_nnan_daz: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call nnan float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_nnan_dynamic(float %in) #1 { ; GFX689-SDAG-LABEL: v_log2_f32_nnan_dynamic: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_nnan_dynamic: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_nnan_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_nnan_dynamic: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_nnan_dynamic: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_nnan_dynamic: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call nnan float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_ninf_daz(float %in) #0 { ; GFX689-LABEL: v_log2_f32_ninf_daz: ; GFX689: ; %bb.0: ; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-NEXT: v_log_f32_e32 v0, v0 ; GFX689-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_f32_ninf_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_ninf_daz: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_ninf_daz: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call ninf float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_ninf_dynamic(float %in) #1 { ; GFX689-SDAG-LABEL: v_log2_f32_ninf_dynamic: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_ninf_dynamic: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_ninf_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_ninf_dynamic: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_ninf_dynamic: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_ninf_dynamic: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call ninf float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_nnan_ninf(float %in) { ; GFX689-SDAG-LABEL: v_log2_f32_nnan_ninf: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_nnan_ninf: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_nnan_ninf: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_nnan_ninf: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_nnan_ninf: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_nnan_ninf: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call nnan ninf float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_nnan_ninf_daz(float %in) #0 { ; GFX689-LABEL: v_log2_f32_nnan_ninf_daz: ; GFX689: ; %bb.0: ; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-NEXT: v_log_f32_e32 v0, v0 ; GFX689-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_f32_nnan_ninf_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_nnan_ninf_daz: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_nnan_ninf_daz: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call nnan ninf float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX689-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_nnan_ninf_dynamic: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_nnan_ninf_dynamic: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call nnan ninf float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_fast_daz(float %in) #0 { ; GFX689-LABEL: v_log2_f32_fast_daz: ; GFX689: ; %bb.0: ; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-NEXT: v_log_f32_e32 v0, v0 ; GFX689-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_f32_fast_daz: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_fast_daz: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_fast_daz: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call fast float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_dynamic_mode(float %in) #1 { ; GFX689-SDAG-LABEL: v_log2_f32_dynamic_mode: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_dynamic_mode: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_dynamic_mode: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_dynamic_mode: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_dynamic_mode: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_dynamic_mode: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call float @llvm.log2.f32(float %in) ret float %result } define float @v_log2_f32_undef() { ; GFX689-SDAG-LABEL: v_log2_f32_undef: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, s4 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_undef: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX689-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 ; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_undef: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_undef: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, s0, v0, vcc_lo ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_undef: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_undef: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call float @llvm.log2.f32(float undef) ret float %result } define float @v_log2_f32_0() { ; GFX689-SDAG-LABEL: v_log2_f32_0: ; GFX689-SDAG: ; %bb.0: ; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-SDAG-NEXT: v_log_f32_e32 v0, 0 ; GFX689-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0 ; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_0: ; GFX689-GISEL: ; %bb.0: ; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0xff800000 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_f32_0: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_f32_0: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0xff800000 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_0: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_0: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call float @llvm.log2.f32(float 0.0) ret float %result } define float @v_log2_f32_from_fpext_f16(i16 %src.i) { ; GFX689-LABEL: v_log2_f32_from_fpext_f16: ; GFX689: ; %bb.0: ; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX689-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX689-NEXT: v_log_f32_e32 v0, v0 ; GFX689-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_f32_from_fpext_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_from_fpext_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_from_fpext_f16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %src = bitcast i16 %src.i to half %fpext = fpext half %src to float %result = call float @llvm.log2.f32(float %fpext) ret float %result } define float @v_log2_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-SDAG-LABEL: v_log2_f32_from_fpext_math_f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc ; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_f32_from_fpext_math_f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_log2_f32_from_fpext_math_f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_f16_e32 v0, v0, v1 ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; VI-NEXT: v_log_f32_e32 v0, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_log2_f32_from_fpext_math_f16: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_add_f16_e32 v0, v0, v1 ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX900-NEXT: v_log_f32_e32 v0, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_f32_from_fpext_math_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_from_fpext_math_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_from_fpext_math_f16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %src0 = bitcast i16 %src0.i to half %src1 = bitcast i16 %src1.i to half %fadd = fadd half %src0, %src1 %fpext = fpext half %fadd to float %result = call float @llvm.log2.f32(float %fpext) ret float %result } define float @v_log2_f32_from_fpext_bf16(bfloat %src) { ; SI-LABEL: v_log2_f32_from_fpext_bf16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, 0x800000 ; SI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; SI-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; SI-NEXT: v_mul_f32_e32 v0, v0, v2 ; SI-NEXT: v_log_f32_e32 v0, v0 ; SI-NEXT: v_mov_b32_e32 v1, 0x42000000 ; SI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_log2_f32_from_fpext_bf16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; VI-NEXT: s_mov_b32 s4, 0x800000 ; VI-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; VI-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc ; VI-NEXT: v_mul_f32_e32 v0, v0, v1 ; VI-NEXT: v_log_f32_e32 v0, v0 ; VI-NEXT: v_mov_b32_e32 v1, 0x42000000 ; VI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_log2_f32_from_fpext_bf16: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc ; GFX900-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX900-NEXT: v_log_f32_e32 v0, v0 ; GFX900-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_f32_from_fpext_bf16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1100-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo ; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f32_from_fpext_bf16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f32_from_fpext_bf16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %fpext = fpext bfloat %src to float %result = call float @llvm.log2.f32(float %fpext) ret float %result } define half @v_log2_f16(half %in) { ; SI-SDAG-LABEL: v_log2_f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_log2_f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_log_f16_e32 v0, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_log2_f16: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_log_f16_e32 v0, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call half @llvm.log2.f16(half %in) ret half %result } define half @v_log2_fabs_f16(half %in) { ; SI-SDAG-LABEL: v_log2_fabs_f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_fabs_f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0| ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_log2_fabs_f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_log_f16_e64 v0, |v0| ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_log2_fabs_f16: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_log_f16_e64 v0, |v0| ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_fabs_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f16_e64 v0, |v0| ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_fabs_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_fabs_f16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %fabs = call half @llvm.fabs.f16(half %in) %result = call half @llvm.log2.f16(half %fabs) ret half %result } define half @v_log2_fneg_fabs_f16(half %in) { ; SI-SDAG-LABEL: v_log2_fneg_fabs_f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0| ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_fneg_fabs_f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0| ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_log2_fneg_fabs_f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_log_f16_e64 v0, -|v0| ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_log2_fneg_fabs_f16: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_log_f16_e64 v0, -|v0| ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_fneg_fabs_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f16_e64 v0, -|v0| ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_fneg_fabs_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_fneg_fabs_f16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %fabs = call half @llvm.fabs.f16(half %in) %fneg.fabs = fneg half %fabs %result = call half @llvm.log2.f16(half %fneg.fabs) ret half %result } define half @v_log2_fneg_f16(half %in) { ; SI-SDAG-LABEL: v_log2_fneg_f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_fneg_f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_log2_fneg_f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_log_f16_e64 v0, -v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_log2_fneg_f16: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_log_f16_e64 v0, -v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_fneg_f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f16_e64 v0, -v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_fneg_f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_fneg_f16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %fneg = fneg half %in %result = call half @llvm.log2.f16(half %fneg) ret half %result } define half @v_log2_f16_fast(half %in) { ; SI-SDAG-LABEL: v_log2_f16_fast: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_f16_fast: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_log2_f16_fast: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_log_f16_e32 v0, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_log2_f16_fast: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_log_f16_e32 v0, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_f16_fast: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_f16_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_f16_fast: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call fast half @llvm.log2.f16(half %in) ret half %result } define <2 x half> @v_log2_v2f16(<2 x half> %in) { ; SI-SDAG-LABEL: v_log2_v2f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_v2f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_v2f16: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log2_v2f16: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_log_f16_e32 v1, v0 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log2_v2f16: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log2_v2f16: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_v2f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_log_f16_e32 v1, v1 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_v2f16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call <2 x half> @llvm.log2.v2f16(<2 x half> %in) ret <2 x half> %result } define <2 x half> @v_log2_fabs_v2f16(<2 x half> %in) { ; SI-SDAG-LABEL: v_log2_fabs_v2f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_fabs_v2f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_fabs_v2f16: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: v_log_f16_sdwa v1, |v0| dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-SDAG-NEXT: v_log_f16_e64 v0, |v0| ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log2_fabs_v2f16: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 ; VI-GISEL-NEXT: v_log_f16_e32 v1, v0 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log2_fabs_v2f16: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-SDAG-NEXT: v_log_f16_e64 v0, |v0| ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log2_fabs_v2f16: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 ; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_fabs_v2f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, |v1| ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_fabs_v2f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_fabs_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_fabs_v2f16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) %result = call <2 x half> @llvm.log2.v2f16(<2 x half> %fabs) ret <2 x half> %result } define <2 x half> @v_log2_fneg_fabs_v2f16(<2 x half> %in) { ; SI-SDAG-LABEL: v_log2_fneg_fabs_v2f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0 ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_fneg_fabs_v2f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; SI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_fneg_fabs_v2f16: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: v_log_f16_sdwa v1, -|v0| dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-SDAG-NEXT: v_log_f16_e64 v0, -|v0| ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log2_fneg_fabs_v2f16: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 ; VI-GISEL-NEXT: v_log_f16_e32 v1, v0 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log2_fneg_fabs_v2f16: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-SDAG-NEXT: v_log_f16_e64 v0, -|v0| ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log2_fneg_fabs_v2f16: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 ; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_fneg_fabs_v2f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -|v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, -|v1| ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_fneg_fabs_v2f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_fneg_fabs_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_fneg_fabs_v2f16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) %fneg.fabs = fneg <2 x half> %fabs %result = call <2 x half> @llvm.log2.v2f16(<2 x half> %fneg.fabs) ret <2 x half> %result } define <2 x half> @v_log2_fneg_v2f16(<2 x half> %in) { ; SI-SDAG-LABEL: v_log2_fneg_v2f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_fneg_v2f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; SI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_fneg_v2f16: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: v_log_f16_sdwa v1, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-SDAG-NEXT: v_log_f16_e64 v0, -v0 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log2_fneg_v2f16: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; VI-GISEL-NEXT: v_log_f16_e32 v1, v0 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log2_fneg_v2f16: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-SDAG-NEXT: v_log_f16_e64 v0, -v0 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log2_fneg_v2f16: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_fneg_v2f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, -v1 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_fneg_v2f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_fneg_v2f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_fneg_v2f16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %fneg = fneg <2 x half> %in %result = call <2 x half> @llvm.log2.v2f16(<2 x half> %fneg) ret <2 x half> %result } define <2 x half> @v_log2_v2f16_fast(<2 x half> %in) { ; SI-SDAG-LABEL: v_log2_v2f16_fast: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_v2f16_fast: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_v2f16_fast: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log2_v2f16_fast: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_log_f16_e32 v1, v0 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log2_v2f16_fast: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log2_v2f16_fast: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_v2f16_fast: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_log_f16_e32 v1, v1 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_v2f16_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_v2f16_fast: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call fast <2 x half> @llvm.log2.v2f16(<2 x half> %in) ret <2 x half> %result } define <3 x half> @v_log2_v3f16(<3 x half> %in) { ; SI-SDAG-LABEL: v_log2_v3f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_v3f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_v3f16: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: v_log_f16_sdwa v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 ; VI-SDAG-NEXT: v_log_f16_e32 v1, v1 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log2_v3f16: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_log_f16_e32 v2, v0 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-GISEL-NEXT: v_log_f16_e32 v1, v1 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log2_v3f16: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 ; GFX900-SDAG-NEXT: v_log_f16_e32 v1, v1 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log2_v3f16: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v1 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_v3f16: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: v_log_f16_e32 v1, v1 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_log_f16_e32 v2, v2 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v2 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_v3f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_v3f16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call <3 x half> @llvm.log2.v3f16(<3 x half> %in) ret <3 x half> %result } define <3 x half> @v_log2_v3f16_fast(<3 x half> %in) { ; SI-SDAG-LABEL: v_log2_v3f16_fast: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_v3f16_fast: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_v3f16_fast: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: v_log_f16_sdwa v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 ; VI-SDAG-NEXT: v_log_f16_e32 v1, v1 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log2_v3f16_fast: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_log_f16_e32 v2, v0 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-GISEL-NEXT: v_log_f16_e32 v1, v1 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log2_v3f16_fast: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 ; GFX900-SDAG-NEXT: v_log_f16_e32 v1, v1 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log2_v3f16_fast: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v1 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: v_log2_v3f16_fast: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX1100-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-NEXT: v_log_f16_e32 v1, v1 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_log_f16_e32 v2, v2 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v2 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_v3f16_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_v3f16_fast: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call fast <3 x half> @llvm.log2.v3f16(<3 x half> %in) ret <3 x half> %result } define <4 x half> @v_log2_v4f16(<4 x half> %in) { ; SI-SDAG-LABEL: v_log2_v4f16: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_v4f16: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 ; SI-GISEL-NEXT: v_log_f32_e32 v3, v3 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_v4f16: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: v_log_f16_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-SDAG-NEXT: v_log_f16_sdwa v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 ; VI-SDAG-NEXT: v_log_f16_e32 v1, v1 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v3 ; VI-SDAG-NEXT: v_or_b32_e32 v1, v1, v2 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log2_v4f16: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_log_f16_e32 v2, v0 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-GISEL-NEXT: v_log_f16_e32 v3, v1 ; VI-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 ; VI-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log2_v4f16: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: v_log_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-SDAG-NEXT: v_log_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 ; GFX900-SDAG-NEXT: v_log_f16_e32 v1, v1 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log2_v4f16: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-GISEL-NEXT: v_log_f16_e32 v3, v1 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v1, v3, v1 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_v4f16: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v1, v1 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f16_e32 v2, v2 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v3, v3 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v1, v1, v3 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_v4f16: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f16_e32 v2, v2 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v3, v3 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v2 ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v1, v1, v3 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_v4f16: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_v4f16: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call <4 x half> @llvm.log2.v4f16(<4 x half> %in) ret <4 x half> %result } define <4 x half> @v_log2_v4f16_fast(<4 x half> %in) { ; SI-SDAG-LABEL: v_log2_v4f16_fast: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_log2_v4f16_fast: ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 ; SI-GISEL-NEXT: v_log_f32_e32 v3, v3 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_v4f16_fast: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: v_log_f16_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-SDAG-NEXT: v_log_f16_sdwa v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 ; VI-SDAG-NEXT: v_log_f16_e32 v1, v1 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v3 ; VI-SDAG-NEXT: v_or_b32_e32 v1, v1, v2 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; VI-GISEL-LABEL: v_log2_v4f16_fast: ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_log_f16_e32 v2, v0 ; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-GISEL-NEXT: v_log_f16_e32 v3, v1 ; VI-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 ; VI-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_log2_v4f16_fast: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: v_log_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-SDAG-NEXT: v_log_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 ; GFX900-SDAG-NEXT: v_log_f16_e32 v1, v1 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3 ; GFX900-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_log2_v4f16_fast: ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-GISEL-NEXT: v_log_f16_e32 v3, v1 ; GFX900-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0 ; GFX900-GISEL-NEXT: v_pack_b32_f16 v1, v3, v1 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-SDAG-LABEL: v_log2_v4f16_fast: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v1, v1 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f16_e32 v2, v2 ; GFX1100-SDAG-NEXT: v_log_f16_e32 v3, v3 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_pack_b32_f16 v1, v1, v3 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log2_v4f16_fast: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f16_e32 v2, v2 ; GFX1100-GISEL-NEXT: v_log_f16_e32 v3, v3 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v2 ; GFX1100-GISEL-NEXT: v_pack_b32_f16 v1, v1, v3 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_log2_v4f16_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END ; R600-NEXT: PAD ; ; CM-LABEL: v_log2_v4f16_fast: ; CM: ; %bb.0: ; CM-NEXT: CF_END ; CM-NEXT: PAD %result = call fast <4 x half> @llvm.log2.v4f16(<4 x half> %in) ret <4 x half> %result } declare float @llvm.fabs.f32(float) #2 declare float @llvm.log2.f32(float) #2 declare <2 x float> @llvm.log2.v2f32(<2 x float>) #2 declare <3 x float> @llvm.log2.v3f32(<3 x float>) #2 declare <4 x float> @llvm.log2.v4f32(<4 x float>) #2 declare half @llvm.fabs.f16(half) #2 declare half @llvm.log2.f16(half) #2 declare <2 x half> @llvm.log2.v2f16(<2 x half>) #2 declare <3 x half> @llvm.log2.v3f16(<3 x half>) #2 declare <4 x half> @llvm.log2.v4f16(<4 x half>) #2 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2 attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" } attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" } attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }