diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.log10.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.log10.ll | 584 |
1 files changed, 443 insertions, 141 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll index 7465b49..f40316d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll @@ -203,7 +203,7 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| ; GFX1100-SDAG-NEXT: v_fma_f32 v3, 0x3e9a209a, v1, -v2 @@ -229,7 +229,7 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -316,44 +316,346 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ret void } +define amdgpu_kernel void @s_log10_contract_f32(ptr addrspace(1) %out, float %in) { +; SI-SDAG-LABEL: s_log10_contract_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb +; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 +; SI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec +; SI-SDAG-NEXT: s_cselect_b32 s2, 32, 0 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: v_mov_b32_e32 v1, s2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s6, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s4, -v2 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s4, v3 +; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: s_log10_contract_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_load_dword s0, s[4:5], 0xb +; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s0, v0 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v4 +; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v4, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-GISEL-NEXT: s_endpgm +; +; VI-SDAG-LABEL: s_log10_contract_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 +; VI-SDAG-NEXT: s_and_b64 s[0:1], vcc, exec +; VI-SDAG-NEXT: s_cselect_b32 s0, 32, 0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s2, 0x7f800000 +; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v5, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v1, v0 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: s_log10_contract_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c +; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 +; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 +; VI-GISEL-NEXT: s_endpgm +; +; GFX900-SDAG-LABEL: s_log10_contract_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_load_dword s6, s[4:5], 0x2c +; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 +; GFX900-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec +; GFX900-SDAG-NEXT: s_cselect_b32 s2, 32, 0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, s2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s6, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 +; GFX900-SDAG-NEXT: s_mov_b32 s2, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s3, 0x3284fbcf +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v4, v1, s2, -v3 +; GFX900-SDAG-NEXT: v_fma_f32 v4, v1, s3, v4 +; GFX900-SDAG-NEXT: s_mov_b32 s2, 0x7f800000 +; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1] +; GFX900-SDAG-NEXT: s_endpgm +; +; GFX900-GISEL-LABEL: s_log10_contract_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c +; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v2, -v5 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v5, v2 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[2:3] +; GFX900-GISEL-NEXT: s_endpgm +; +; GFX1100-SDAG-LABEL: s_log10_contract_f32: +; GFX1100-SDAG: ; %bb.0: +; GFX1100-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s1, 0x800000, s0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x411a209b, s1 +; GFX1100-SDAG-NEXT: s_and_b32 s1, s1, exec_lo +; GFX1100-SDAG-NEXT: s_cselect_b32 s1, 32, 0 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v1, s0, s1 +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX1100-SDAG-NEXT: v_fma_f32 v3, 0x3e9a209a, v1, -v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v3, v1, 0x3284fbcf, v3 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_dual_cndmask_b32 v1, v1, v2 :: v_dual_mov_b32 v2, 0 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX1100-SDAG-NEXT: s_endpgm +; +; GFX1100-GISEL-LABEL: s_log10_contract_f32: +; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX1100-GISEL-NEXT: s_endpgm +; +; R600-LABEL: s_log10_contract_f32: +; R600: ; %bb.0: +; R600-NEXT: ALU 23, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 4: +; R600-NEXT: SETGT * T0.W, literal.x, KC0[2].Z, +; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) +; R600-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x, +; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) +; R600-NEXT: MUL_IEEE * T1.W, KC0[2].Z, PV.W, +; R600-NEXT: LOG_IEEE * T0.X, PV.W, +; R600-NEXT: AND_INT * T1.W, PS, literal.x, +; R600-NEXT: -4096(nan), 0(0.000000e+00) +; R600-NEXT: ADD * T2.W, T0.X, -PV.W, +; R600-NEXT: MUL_IEEE * T3.W, PV.W, literal.x, +; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE * T3.W, T1.W, literal.x, PV.W, +; R600-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE * T2.W, T2.W, literal.x, PV.W, +; R600-NEXT: 1050288128(3.010254e-01), 0(0.000000e+00) +; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W, +; R600-NEXT: SETGT * T2.W, literal.y, |T0.X|, +; R600-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; R600-NEXT: CNDE T1.W, PS, T0.X, PV.W, +; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x, +; R600-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00) +; R600-NEXT: ADD T0.X, PV.W, -PS, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; CM-LABEL: s_log10_contract_f32: +; CM: ; %bb.0: +; CM-NEXT: ALU 26, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: ALU clause starting at 4: +; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z, +; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) +; CM-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x, +; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) +; CM-NEXT: MUL_IEEE * T1.W, KC0[2].Z, PV.W, +; CM-NEXT: LOG_IEEE T0.X, T1.W, +; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W, +; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W, +; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W, +; CM-NEXT: AND_INT * T1.W, PV.X, literal.x, +; CM-NEXT: -4096(nan), 0(0.000000e+00) +; CM-NEXT: ADD * T2.W, T0.X, -PV.W, +; CM-NEXT: MUL_IEEE * T3.W, PV.W, literal.x, +; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE * T3.W, T1.W, literal.x, PV.W, +; CM-NEXT: 916096251(4.605039e-06), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE * T2.W, T2.W, literal.x, PV.W, +; CM-NEXT: 1050288128(3.010254e-01), 0(0.000000e+00) +; CM-NEXT: MULADD_IEEE T0.Z, T1.W, literal.x, PV.W, +; CM-NEXT: SETGT * T1.W, literal.y, |T0.X|, +; CM-NEXT: 1050288128(3.010254e-01), 2139095040(INF) +; CM-NEXT: CNDE T0.Z, PV.W, T0.X, PV.Z, +; CM-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x, +; CM-NEXT: 1092231323(9.632960e+00), 0(0.000000e+00) +; CM-NEXT: ADD * T0.X, PV.Z, -PV.W, +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %result = call contract float @llvm.log10.f32(float %in) + store float %result, ptr addrspace(1) %out + ret void +} + ; FIXME: We should be able to merge these packets together on Cayman so we ; have a maximum of 4 instructions. define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; SI-SDAG-LABEL: s_log10_v2f32: ; SI-SDAG: ; %bb.0: -; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 +; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b -; SI-SDAG-NEXT: s_mov_b32 s8, 0x3284fbcf +; SI-SDAG-NEXT: s_mov_b32 s8, 0x3e9a209a ; SI-SDAG-NEXT: s_mov_b32 s9, 0x7f800000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 -; SI-SDAG-NEXT: s_and_b64 s[0:1], vcc, exec -; SI-SDAG-NEXT: s_cselect_b32 s0, 32, 0 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, s0 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v3, s7, v3 +; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 +; SI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec +; SI-SDAG-NEXT: s_cselect_b32 s2, 32, 0 +; SI-SDAG-NEXT: v_mov_b32_e32 v3, s2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v3, s5, v3 ; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; SI-SDAG-NEXT: s_mov_b32 s0, s4 -; SI-SDAG-NEXT: s_mov_b32 s1, s5 -; SI-SDAG-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-SDAG-NEXT: s_mov_b32 s7, 0x3e9a209a +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: s_and_b64 s[6:7], vcc, exec ; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v3 -; SI-SDAG-NEXT: s_cselect_b32 s4, 32, 0 -; SI-SDAG-NEXT: v_fma_f32 v5, v3, s7, -v4 +; SI-SDAG-NEXT: s_cselect_b32 s6, 32, 0 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; SI-SDAG-NEXT: v_fma_f32 v5, v3, s8, -v4 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v1, s4 -; SI-SDAG-NEXT: v_fma_f32 v5, v3, s8, v5 -; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s6, v1 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, s6 +; SI-SDAG-NEXT: v_fma_f32 v5, v3, s5, v5 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s4, v1 ; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 ; SI-SDAG-NEXT: v_log_f32_e32 v5, v1 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, s9 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc ; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v5 -; SI-SDAG-NEXT: v_fma_f32 v3, v5, s7, -v2 -; SI-SDAG-NEXT: v_fma_f32 v3, v5, s8, v3 +; SI-SDAG-NEXT: v_fma_f32 v3, v5, s8, -v2 +; SI-SDAG-NEXT: v_fma_f32 v3, v5, s5, v3 ; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v5|, s9 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc @@ -600,7 +902,7 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v1 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v3 @@ -632,7 +934,7 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s3, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v3, 0x3e9a209a, v1 :: v_dual_lshlrev_b32 v0, 5, v0 ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s2, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -640,7 +942,7 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v5, 0x3284fbcf, v1 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_dual_add_f32 v3, v3, v5 :: v_dual_mul_f32 v2, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x411a209b, s5 @@ -1153,7 +1455,7 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, v5 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v6, 0x3e9a209a, v2 :: v_dual_mul_f32 v7, 0x3e9a209a, v4 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| ; GFX1100-SDAG-NEXT: v_fma_f32 v9, 0x3e9a209a, v2, -v6 @@ -1198,7 +1500,7 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v2 :: v_dual_lshlrev_b32 v0, 5, v0 ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -1207,7 +1509,7 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v8, 0x3284fbcf, v2 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v5, v5, v8 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1220,7 +1522,7 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x411a209b, s7 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v1 ; GFX1100-GISEL-NEXT: v_fma_f32 v7, 0x3e9a209a, v1, -v4 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -1876,7 +2178,7 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX1100-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_3) ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v8, 0x3e9a209a, v2 :: v_dual_mul_f32 v9, 0x3e9a209a, v3 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_dual_mul_f32 v10, 0x3e9a209a, v6 :: v_dual_mul_f32 v11, 0x3e9a209a, v7 ; GFX1100-SDAG-NEXT: v_fma_f32 v12, 0x3e9a209a, v2, -v8 ; GFX1100-SDAG-NEXT: v_fma_f32 v13, 0x3e9a209a, v3, -v9 @@ -1931,7 +2233,7 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3 ; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a209a, v2 ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v3 @@ -1944,7 +2246,7 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3284fbcf, v2 :: v_dual_fmac_f32 v13, 0x3284fbcf, v3 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v7, v7, v12 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v1 @@ -2304,7 +2606,7 @@ define float @v_log10_f32(float %in) { ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -2328,7 +2630,7 @@ define float @v_log10_f32(float %in) { ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -2505,7 +2807,7 @@ define float @v_log10_fabs_f32(float %in) { ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -2528,7 +2830,7 @@ define float @v_log10_fabs_f32(float %in) { ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -2707,7 +3009,7 @@ define float @v_log10_fneg_fabs_f32(float %in) { ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -2730,7 +3032,7 @@ define float @v_log10_fneg_fabs_f32(float %in) { ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -2909,7 +3211,7 @@ define float @v_log10_fneg_f32(float %in) { ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -2933,7 +3235,7 @@ define float @v_log10_fneg_f32(float %in) { ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -3049,7 +3351,7 @@ define float @v_log10_f32_fast(float %in) { ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3059,7 +3361,7 @@ define float @v_log10_f32_fast(float %in) { ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -3226,7 +3528,7 @@ define float @v_log10_f32_ninf(float %in) { ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -3250,7 +3552,7 @@ define float @v_log10_f32_ninf(float %in) { ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -3364,7 +3666,7 @@ define float @v_log10_f32_afn(float %in) { ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3374,7 +3676,7 @@ define float @v_log10_f32_afn(float %in) { ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -3403,7 +3705,7 @@ define float @v_log10_f32_afn_daz(float %in) #0 { ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -3508,7 +3810,7 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 { ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3518,7 +3820,7 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 { ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -3624,7 +3926,7 @@ define float @v_fabs_log10_f32_afn(float %in) { ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3635,7 +3937,7 @@ define float @v_fabs_log10_f32_afn(float %in) { ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, s0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -3754,7 +4056,7 @@ define float @v_log10_f32_daz(float %in) #0 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -3769,7 +4071,7 @@ define float @v_log10_f32_daz(float %in) #0 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -3943,7 +4245,7 @@ define float @v_log10_f32_nnan(float %in) { ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -3967,7 +4269,7 @@ define float @v_log10_f32_nnan(float %in) { ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -4094,7 +4396,7 @@ define float @v_log10_f32_nnan_daz(float %in) #0 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -4109,7 +4411,7 @@ define float @v_log10_f32_nnan_daz(float %in) #0 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -4283,7 +4585,7 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -4307,7 +4609,7 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -4434,7 +4736,7 @@ define float @v_log10_f32_ninf_daz(float %in) #0 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -4449,7 +4751,7 @@ define float @v_log10_f32_ninf_daz(float %in) #0 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -4623,7 +4925,7 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 { ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -4647,7 +4949,7 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 { ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -4805,7 +5107,7 @@ define float @v_log10_f32_nnan_ninf(float %in) { ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -4826,7 +5128,7 @@ define float @v_log10_f32_nnan_ninf(float %in) { ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -4918,7 +5220,7 @@ define float @v_log10_f32_nnan_ninf_daz(float %in) #0 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -4931,7 +5233,7 @@ define float @v_log10_f32_nnan_ninf_daz(float %in) #0 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -5085,7 +5387,7 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -5106,7 +5408,7 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -5142,7 +5444,7 @@ define float @v_log10_f32_fast_daz(float %in) #0 { ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; @@ -5309,7 +5611,7 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -5333,7 +5635,7 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { ; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -5490,7 +5792,7 @@ define float @v_log10_f32_0() { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0 -; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -5507,7 +5809,7 @@ define float @v_log10_f32_0() { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, 0 -; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -5642,7 +5944,7 @@ define float @v_log10_f32_from_fpext_f16(i16 %src.i) { ; GFX1100-SDAG-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.l ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-TRUE16-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-TRUE16-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -5659,7 +5961,7 @@ define float @v_log10_f32_from_fpext_f16(i16 %src.i) { ; GFX1100-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-FAKE16-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -5676,7 +5978,7 @@ define float @v_log10_f32_from_fpext_f16(i16 %src.i) { ; GFX1100-GISEL-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.l ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-TRUE16-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-TRUE16-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-TRUE16-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -5693,7 +5995,7 @@ define float @v_log10_f32_from_fpext_f16(i16 %src.i) { ; GFX1100-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-FAKE16-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-FAKE16-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -5846,7 +6148,7 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.l ; GFX1100-SDAG-TRUE16-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -5864,7 +6166,7 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-SDAG-FAKE16-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -5882,7 +6184,7 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.l ; GFX1100-GISEL-TRUE16-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-TRUE16-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -5900,7 +6202,7 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -6011,7 +6313,7 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) { ; GFX1100-SDAG-TRUE16-NEXT: v_ldexp_f32 v0, v1, v0 ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-TRUE16-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-TRUE16-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-TRUE16-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -6034,7 +6336,7 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) { ; GFX1100-SDAG-FAKE16-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-FAKE16-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-FAKE16-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -6058,7 +6360,7 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) { ; GFX1100-GISEL-TRUE16-NEXT: v_ldexp_f32 v0, v1, v0 ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-TRUE16-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-TRUE16-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-GISEL-TRUE16-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -6081,7 +6383,7 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) { ; GFX1100-GISEL-FAKE16-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-FAKE16-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-GISEL-FAKE16-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -6147,7 +6449,7 @@ define half @v_log10_f16(half %in) { ; GFX1100-SDAG-TRUE16: ; %bb.0: ; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6155,7 +6457,7 @@ define half @v_log10_f16(half %in) { ; GFX1100-SDAG-FAKE16: ; %bb.0: ; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v0, v0 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6163,7 +6465,7 @@ define half @v_log10_f16(half %in) { ; GFX1100-GISEL-TRUE16: ; %bb.0: ; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6171,7 +6473,7 @@ define half @v_log10_f16(half %in) { ; GFX1100-GISEL-FAKE16: ; %bb.0: ; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6227,7 +6529,7 @@ define half @v_log10_fabs_f16(half %in) { ; GFX1100-SDAG-TRUE16: ; %bb.0: ; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e64 v0.l, |v0.l| -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6235,7 +6537,7 @@ define half @v_log10_fabs_f16(half %in) { ; GFX1100-SDAG-FAKE16: ; %bb.0: ; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e64 v0, |v0| -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6243,7 +6545,7 @@ define half @v_log10_fabs_f16(half %in) { ; GFX1100-GISEL-TRUE16: ; %bb.0: ; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e64 v0.l, |v0.l| -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6251,7 +6553,7 @@ define half @v_log10_fabs_f16(half %in) { ; GFX1100-GISEL-FAKE16: ; %bb.0: ; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e64 v0, |v0| -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6308,7 +6610,7 @@ define half @v_log10_fneg_fabs_f16(half %in) { ; GFX1100-SDAG-TRUE16: ; %bb.0: ; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e64 v0.l, -|v0.l| -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6316,7 +6618,7 @@ define half @v_log10_fneg_fabs_f16(half %in) { ; GFX1100-SDAG-FAKE16: ; %bb.0: ; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e64 v0, -|v0| -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6324,7 +6626,7 @@ define half @v_log10_fneg_fabs_f16(half %in) { ; GFX1100-GISEL-TRUE16: ; %bb.0: ; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e64 v0.l, -|v0.l| -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6332,7 +6634,7 @@ define half @v_log10_fneg_fabs_f16(half %in) { ; GFX1100-GISEL-FAKE16: ; %bb.0: ; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e64 v0, -|v0| -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6390,7 +6692,7 @@ define half @v_log10_fneg_f16(half %in) { ; GFX1100-SDAG-TRUE16: ; %bb.0: ; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e64 v0.l, -v0.l -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6398,7 +6700,7 @@ define half @v_log10_fneg_f16(half %in) { ; GFX1100-SDAG-FAKE16: ; %bb.0: ; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e64 v0, -v0 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6406,7 +6708,7 @@ define half @v_log10_fneg_f16(half %in) { ; GFX1100-GISEL-TRUE16: ; %bb.0: ; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e64 v0.l, -v0.l -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6414,7 +6716,7 @@ define half @v_log10_fneg_f16(half %in) { ; GFX1100-GISEL-FAKE16: ; %bb.0: ; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e64 v0, -v0 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6469,7 +6771,7 @@ define half @v_log10_f16_fast(half %in) { ; GFX1100-SDAG-TRUE16: ; %bb.0: ; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6477,7 +6779,7 @@ define half @v_log10_f16_fast(half %in) { ; GFX1100-SDAG-FAKE16: ; %bb.0: ; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v0, v0 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6485,7 +6787,7 @@ define half @v_log10_f16_fast(half %in) { ; GFX1100-GISEL-TRUE16: ; %bb.0: ; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6493,7 +6795,7 @@ define half @v_log10_f16_fast(half %in) { ; GFX1100-GISEL-FAKE16: ; %bb.0: ; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6578,7 +6880,7 @@ define <2 x half> @v_log10_v2f16(<2 x half> %in) { ; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6592,7 +6894,7 @@ define <2 x half> @v_log10_v2f16(<2 x half> %in) { ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v1, v1 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 @@ -6603,7 +6905,7 @@ define <2 x half> @v_log10_v2f16(<2 x half> %in) { ; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6617,7 +6919,7 @@ define <2 x half> @v_log10_v2f16(<2 x half> %in) { ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v1, v1 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 @@ -6721,7 +7023,7 @@ define <2 x half> @v_log10_fabs_v2f16(<2 x half> %in) { ; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e64 v0.l, |v0.l| ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e64 v0.h, |v0.h| -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6735,7 +7037,7 @@ define <2 x half> @v_log10_fabs_v2f16(<2 x half> %in) { ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e64 v0, |v0| ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e64 v1, |v1| -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 @@ -6748,7 +7050,7 @@ define <2 x half> @v_log10_fabs_v2f16(<2 x half> %in) { ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6763,7 +7065,7 @@ define <2 x half> @v_log10_fabs_v2f16(<2 x half> %in) { ; GFX1100-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v1, v1 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6873,7 +7175,7 @@ define <2 x half> @v_log10_fneg_fabs_v2f16(<2 x half> %in) { ; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e64 v0.l, -|v0.l| ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e64 v0.h, -|v0.h| -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6887,7 +7189,7 @@ define <2 x half> @v_log10_fneg_fabs_v2f16(<2 x half> %in) { ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e64 v0, -|v0| ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e64 v1, -|v1| -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 @@ -6900,7 +7202,7 @@ define <2 x half> @v_log10_fneg_fabs_v2f16(<2 x half> %in) { ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6915,7 +7217,7 @@ define <2 x half> @v_log10_fneg_fabs_v2f16(<2 x half> %in) { ; GFX1100-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v1, v1 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7026,7 +7328,7 @@ define <2 x half> @v_log10_fneg_v2f16(<2 x half> %in) { ; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e64 v0.l, -v0.l ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e64 v0.h, -v0.h -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7040,7 +7342,7 @@ define <2 x half> @v_log10_fneg_v2f16(<2 x half> %in) { ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e64 v0, -v0 ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e64 v1, -v1 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 @@ -7053,7 +7355,7 @@ define <2 x half> @v_log10_fneg_v2f16(<2 x half> %in) { ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7068,7 +7370,7 @@ define <2 x half> @v_log10_fneg_v2f16(<2 x half> %in) { ; GFX1100-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v1, v1 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7153,7 +7455,7 @@ define <2 x half> @v_log10_v2f16_fast(<2 x half> %in) { ; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7167,7 +7469,7 @@ define <2 x half> @v_log10_v2f16_fast(<2 x half> %in) { ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v1, v1 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 @@ -7178,7 +7480,7 @@ define <2 x half> @v_log10_v2f16_fast(<2 x half> %in) { ; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v0.h, v0.h -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7192,7 +7494,7 @@ define <2 x half> @v_log10_v2f16_fast(<2 x half> %in) { ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v0, v0 ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v1, v1 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 @@ -7285,7 +7587,7 @@ define <3 x half> @v_log10_v3f16(<3 x half> %in) { ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v1.l, v1.l ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x34d1, v1.l ; GFX1100-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h @@ -7300,7 +7602,7 @@ define <3 x half> @v_log10_v3f16(<3 x half> %in) { ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v2, v2 ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v2, 0x34d1, v2 ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7315,7 +7617,7 @@ define <3 x half> @v_log10_v3f16(<3 x half> %in) { ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v1.l, v1.l ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x34d1, v1.l ; GFX1100-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h @@ -7330,7 +7632,7 @@ define <3 x half> @v_log10_v3f16(<3 x half> %in) { ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3) ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v2, v2 ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v2, 0x34d1, v2 ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7418,7 +7720,7 @@ define <3 x half> @v_log10_v3f16_fast(<3 x half> %in) { ; GFX1100-SDAG-TRUE16-NEXT: v_log_f16_e32 v1.l, v1.l ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x34d1, v1.l ; GFX1100-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h @@ -7433,7 +7735,7 @@ define <3 x half> @v_log10_v3f16_fast(<3 x half> %in) { ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3) ; GFX1100-SDAG-FAKE16-NEXT: v_log_f16_e32 v2, v2 ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v2, 0x34d1, v2 ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7448,7 +7750,7 @@ define <3 x half> @v_log10_v3f16_fast(<3 x half> %in) { ; GFX1100-GISEL-TRUE16-NEXT: v_log_f16_e32 v1.l, v1.l ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x34d1, v1.l ; GFX1100-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h @@ -7463,7 +7765,7 @@ define <3 x half> @v_log10_v3f16_fast(<3 x half> %in) { ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3) ; GFX1100-GISEL-FAKE16-NEXT: v_log_f16_e32 v2, v2 ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v2, 0x34d1, v2 ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7606,7 +7908,7 @@ define <4 x half> @v_log10_v4f16(<4 x half> %in) { ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x34d1, v1.l ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v1.h, 0x34d1, v1.h ; GFX1100-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h @@ -7627,7 +7929,7 @@ define <4 x half> @v_log10_v4f16(<4 x half> %in) { ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v2, 0x34d1, v2 ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v3, 0x34d1, v3 ; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2 @@ -7645,7 +7947,7 @@ define <4 x half> @v_log10_v4f16(<4 x half> %in) { ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_2) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x34d1, v1.l ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v1.h, 0x34d1, v1.h ; GFX1100-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h @@ -7665,7 +7967,7 @@ define <4 x half> @v_log10_v4f16(<4 x half> %in) { ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v2, 0x34d1, v2 ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v3, 0x34d1, v3 ; GFX1100-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2 @@ -7801,7 +8103,7 @@ define <4 x half> @v_log10_v4f16_fast(<4 x half> %in) { ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x34d1, v1.l ; GFX1100-SDAG-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l -; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h ; GFX1100-SDAG-TRUE16-NEXT: v_mul_f16_e32 v1.h, 0x34d1, v1.h ; GFX1100-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h @@ -7822,7 +8124,7 @@ define <4 x half> @v_log10_v4f16_fast(<4 x half> %in) { ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 ; GFX1100-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 -; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v2, 0x34d1, v2 ; GFX1100-SDAG-FAKE16-NEXT: v_mul_f16_e32 v3, 0x34d1, v3 ; GFX1100-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2 @@ -7840,7 +8142,7 @@ define <4 x half> @v_log10_v4f16_fast(<4 x half> %in) { ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x34d1, v0.l ; GFX1100-GISEL-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_2) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v0.h, 0x34d1, v0.h -; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-TRUE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x34d1, v1.l ; GFX1100-GISEL-TRUE16-NEXT: v_mul_f16_e32 v1.h, 0x34d1, v1.h ; GFX1100-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h @@ -7860,7 +8162,7 @@ define <4 x half> @v_log10_v4f16_fast(<4 x half> %in) { ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, 0x34d1, v0 ; GFX1100-GISEL-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v1, 0x34d1, v1 -; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-FAKE16-NEXT: s_waitcnt_depctr depctr_va_vdst(0) ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v2, 0x34d1, v2 ; GFX1100-GISEL-FAKE16-NEXT: v_mul_f16_e32 v3, 0x34d1, v3 ; GFX1100-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2 |
