; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX10 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX12 define void @fcmp_f16_uniform(half inreg %a, half inreg %b, ptr %p) { ; GFX10-LABEL: fcmp_f16_uniform: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_f16_e64 s4, s16, s17 ; GFX10-NEXT: v_cmp_gt_f16_e64 s5, s16, s17 ; GFX10-NEXT: v_cmp_ge_f16_e64 s6, s16, s17 ; GFX10-NEXT: v_cmp_lt_f16_e64 s7, s16, s17 ; GFX10-NEXT: v_cmp_le_f16_e64 s8, s16, s17 ; GFX10-NEXT: s_cmp_lg_u32 s4, 0 ; GFX10-NEXT: v_cmp_lg_f16_e64 s9, s16, s17 ; GFX10-NEXT: s_cselect_b32 s4, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s5, 0 ; GFX10-NEXT: v_cmp_o_f16_e64 s10, s16, s17 ; GFX10-NEXT: s_cselect_b32 s5, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s6, 0 ; GFX10-NEXT: v_cmp_nlg_f16_e64 s11, s16, s17 ; GFX10-NEXT: s_cselect_b32 s6, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s7, 0 ; GFX10-NEXT: v_cmp_nle_f16_e64 s12, s16, s17 ; GFX10-NEXT: s_cselect_b32 s7, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s8, 0 ; GFX10-NEXT: v_cmp_nlt_f16_e64 s13, s16, s17 ; GFX10-NEXT: s_cselect_b32 s8, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s9, 0 ; GFX10-NEXT: v_cmp_nge_f16_e64 s14, s16, s17 ; GFX10-NEXT: s_cselect_b32 s9, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s10, 0 ; GFX10-NEXT: v_cmp_ngt_f16_e64 s15, s16, s17 ; GFX10-NEXT: s_cselect_b32 s10, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10-NEXT: v_cmp_neq_f16_e64 s18, s16, s17 ; GFX10-NEXT: s_cselect_b32 s11, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0 ; GFX10-NEXT: v_cmp_u_f16_e64 s16, s16, s17 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s14, 0 ; GFX10-NEXT: s_cselect_b32 s14, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0 ; GFX10-NEXT: s_cselect_b32 s15, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s18, 0 ; GFX10-NEXT: s_cselect_b32 s17, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s4, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s5, 0 ; GFX10-NEXT: s_cselect_b32 s5, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s6, 0 ; GFX10-NEXT: s_cselect_b32 s6, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s7, 0 ; GFX10-NEXT: s_cselect_b32 s7, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s8, 0 ; GFX10-NEXT: s_cselect_b32 s8, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s9, 0 ; GFX10-NEXT: s_cselect_b32 s9, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s10, 0 ; GFX10-NEXT: s_cselect_b32 s10, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10-NEXT: s_cselect_b32 s11, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s14, 0 ; GFX10-NEXT: s_cselect_b32 s14, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0 ; GFX10-NEXT: s_cselect_b32 s15, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s17, 0 ; GFX10-NEXT: s_cselect_b32 s17, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0 ; GFX10-NEXT: s_add_i32 s4, s4, s5 ; GFX10-NEXT: s_add_i32 s4, s4, s6 ; GFX10-NEXT: s_add_i32 s4, s4, s7 ; GFX10-NEXT: s_add_i32 s4, s4, s8 ; GFX10-NEXT: s_add_i32 s4, s4, s9 ; GFX10-NEXT: s_add_i32 s4, s4, s10 ; GFX10-NEXT: s_add_i32 s4, s4, s11 ; GFX10-NEXT: s_add_i32 s4, s4, s12 ; GFX10-NEXT: s_add_i32 s4, s4, s13 ; GFX10-NEXT: s_add_i32 s4, s4, s14 ; GFX10-NEXT: s_add_i32 s4, s4, s15 ; GFX10-NEXT: s_add_i32 s4, s4, s17 ; GFX10-NEXT: s_add_i32 s4, s4, s16 ; GFX10-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-NEXT: flat_store_dword v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: fcmp_f16_uniform: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_cmp_eq_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 ; GFX12-NEXT: s_cmp_gt_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 ; GFX12-NEXT: s_cmp_ge_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 ; GFX12-NEXT: s_cmp_lt_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 ; GFX12-NEXT: s_cmp_le_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 ; GFX12-NEXT: s_cmp_lg_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s7, 1, 0 ; GFX12-NEXT: s_cmp_o_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s8, 1, 0 ; GFX12-NEXT: s_cmp_nlg_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s9, 1, 0 ; GFX12-NEXT: s_cmp_nle_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s10, 1, 0 ; GFX12-NEXT: s_cmp_nlt_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s11, 1, 0 ; GFX12-NEXT: s_cmp_nge_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s12, 1, 0 ; GFX12-NEXT: s_cmp_ngt_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s13, 1, 0 ; GFX12-NEXT: s_cmp_neq_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s14, 1, 0 ; GFX12-NEXT: s_cmp_u_f16 s0, s1 ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_cmp_lg_u32 s2, 0 ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s3, 0 ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s4, 0 ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s5, 0 ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s6, 0 ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s7, 0 ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s8, 0 ; GFX12-NEXT: s_cselect_b32 s7, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s9, 0 ; GFX12-NEXT: s_cselect_b32 s8, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s10, 0 ; GFX12-NEXT: s_cselect_b32 s9, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s11, 0 ; GFX12-NEXT: s_cselect_b32 s10, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s12, 0 ; GFX12-NEXT: s_cselect_b32 s11, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s13, 0 ; GFX12-NEXT: s_cselect_b32 s12, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s14, 0 ; GFX12-NEXT: s_cselect_b32 s13, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s0, 0 ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s2 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s3 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s4 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s5 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s6 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s7 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s8 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s9 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s10 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s11 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s12 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s13 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s0, s1, s0 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: v_mov_b32_e32 v2, s0 ; GFX12-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %oeq_result = fcmp oeq half %a, %b %ogt_result = fcmp ogt half %a, %b %oge_result = fcmp oge half %a, %b %olt_result = fcmp olt half %a, %b %ole_result = fcmp ole half %a, %b %one_result = fcmp one half %a, %b %ord_result = fcmp ord half %a, %b %ueq_result = fcmp ueq half %a, %b %ugt_result = fcmp ugt half %a, %b %uge_result = fcmp uge half %a, %b %ult_result = fcmp ult half %a, %b %ule_result = fcmp ule half %a, %b %une_result = fcmp une half %a, %b %uno_result = fcmp uno half %a, %b %oeq_zext = zext i1 %oeq_result to i32 %ogt_zext = zext i1 %ogt_result to i32 %oge_zext = zext i1 %oge_result to i32 %olt_zext = zext i1 %olt_result to i32 %ole_zext = zext i1 %ole_result to i32 %one_zext = zext i1 %one_result to i32 %ord_zext = zext i1 %ord_result to i32 %ueq_zext = zext i1 %ueq_result to i32 %ugt_zext = zext i1 %ugt_result to i32 %uge_zext = zext i1 %uge_result to i32 %ult_zext = zext i1 %ult_result to i32 %ule_zext = zext i1 %ule_result to i32 %une_zext = zext i1 %une_result to i32 %uno_zext = zext i1 %uno_result to i32 %sum1 = add i32 %oeq_zext, %ogt_zext %sum2 = add i32 %sum1, %oge_zext %sum3 = add i32 %sum2, %olt_zext %sum4 = add i32 %sum3, %ole_zext %sum5 = add i32 %sum4, %one_zext %sum6 = add i32 %sum5, %ord_zext %sum7 = add i32 %sum6, %ueq_zext %sum8 = add i32 %sum7, %ugt_zext %sum9 = add i32 %sum8, %uge_zext %sum10 = add i32 %sum9, %ult_zext %sum11 = add i32 %sum10, %ule_zext %sum12 = add i32 %sum11, %une_zext %result = add i32 %sum12, %uno_zext store i32 %result, ptr %p ret void } define void @fcmp_f16_divergent(half %a, half %b, ptr %p) { ; GFX10-LABEL: fcmp_f16_divergent: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_gt_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_ge_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_add_nc_u32_e32 v4, v4, v5 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_lt_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_le_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_add3_u32 v4, v4, v6, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_lg_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_o_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_add3_u32 v4, v4, v5, v8 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_add3_u32 v4, v4, v6, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_nge_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_add3_u32 v4, v4, v5, v8 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_neq_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_add3_u32 v1, v4, v6, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX10-NEXT: v_add3_u32 v0, v1, v5, v0 ; GFX10-NEXT: flat_store_dword v[2:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: fcmp_f16_divergent: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_cmp_eq_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_gt_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_ge_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add_nc_u32_e32 v4, v4, v5 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_lt_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_le_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add3_u32 v4, v4, v6, v7 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_lg_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add3_u32 v4, v4, v5, v8 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_nle_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add3_u32 v4, v4, v6, v7 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_nge_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add3_u32 v4, v4, v5, v8 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_neq_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_u_f16_e32 vcc_lo, v0.l, v1.l ; GFX12-NEXT: v_add3_u32 v1, v4, v6, v7 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_add3_u32 v0, v1, v5, v0 ; GFX12-NEXT: flat_store_b32 v[2:3], v0 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %oeq_result = fcmp oeq half %a, %b %ogt_result = fcmp ogt half %a, %b %oge_result = fcmp oge half %a, %b %olt_result = fcmp olt half %a, %b %ole_result = fcmp ole half %a, %b %one_result = fcmp one half %a, %b %ord_result = fcmp ord half %a, %b %ueq_result = fcmp ueq half %a, %b %ugt_result = fcmp ugt half %a, %b %uge_result = fcmp uge half %a, %b %ult_result = fcmp ult half %a, %b %ule_result = fcmp ule half %a, %b %une_result = fcmp une half %a, %b %uno_result = fcmp uno half %a, %b %oeq_zext = zext i1 %oeq_result to i32 %ogt_zext = zext i1 %ogt_result to i32 %oge_zext = zext i1 %oge_result to i32 %olt_zext = zext i1 %olt_result to i32 %ole_zext = zext i1 %ole_result to i32 %one_zext = zext i1 %one_result to i32 %ord_zext = zext i1 %ord_result to i32 %ueq_zext = zext i1 %ueq_result to i32 %ugt_zext = zext i1 %ugt_result to i32 %uge_zext = zext i1 %uge_result to i32 %ult_zext = zext i1 %ult_result to i32 %ule_zext = zext i1 %ule_result to i32 %une_zext = zext i1 %une_result to i32 %uno_zext = zext i1 %uno_result to i32 %sum1 = add i32 %oeq_zext, %ogt_zext %sum2 = add i32 %sum1, %oge_zext %sum3 = add i32 %sum2, %olt_zext %sum4 = add i32 %sum3, %ole_zext %sum5 = add i32 %sum4, %one_zext %sum6 = add i32 %sum5, %ord_zext %sum7 = add i32 %sum6, %ueq_zext %sum8 = add i32 %sum7, %ugt_zext %sum9 = add i32 %sum8, %uge_zext %sum10 = add i32 %sum9, %ult_zext %sum11 = add i32 %sum10, %ule_zext %sum12 = add i32 %sum11, %une_zext %result = add i32 %sum12, %uno_zext store i32 %result, ptr %p ret void } define void @fcmp_f32_uniform(float inreg %a, float inreg %b, ptr %p) { ; GFX10-LABEL: fcmp_f32_uniform: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_f32_e64 s4, s16, s17 ; GFX10-NEXT: v_cmp_gt_f32_e64 s5, s16, s17 ; GFX10-NEXT: v_cmp_ge_f32_e64 s6, s16, s17 ; GFX10-NEXT: v_cmp_lt_f32_e64 s7, s16, s17 ; GFX10-NEXT: v_cmp_le_f32_e64 s8, s16, s17 ; GFX10-NEXT: s_cmp_lg_u32 s4, 0 ; GFX10-NEXT: v_cmp_lg_f32_e64 s9, s16, s17 ; GFX10-NEXT: s_cselect_b32 s4, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s5, 0 ; GFX10-NEXT: v_cmp_o_f32_e64 s10, s16, s17 ; GFX10-NEXT: s_cselect_b32 s5, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s6, 0 ; GFX10-NEXT: v_cmp_nlg_f32_e64 s11, s16, s17 ; GFX10-NEXT: s_cselect_b32 s6, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s7, 0 ; GFX10-NEXT: v_cmp_nle_f32_e64 s12, s16, s17 ; GFX10-NEXT: s_cselect_b32 s7, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s8, 0 ; GFX10-NEXT: v_cmp_nlt_f32_e64 s13, s16, s17 ; GFX10-NEXT: s_cselect_b32 s8, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s9, 0 ; GFX10-NEXT: v_cmp_nge_f32_e64 s14, s16, s17 ; GFX10-NEXT: s_cselect_b32 s9, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s10, 0 ; GFX10-NEXT: v_cmp_ngt_f32_e64 s15, s16, s17 ; GFX10-NEXT: s_cselect_b32 s10, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10-NEXT: v_cmp_neq_f32_e64 s18, s16, s17 ; GFX10-NEXT: s_cselect_b32 s11, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0 ; GFX10-NEXT: v_cmp_u_f32_e64 s16, s16, s17 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s14, 0 ; GFX10-NEXT: s_cselect_b32 s14, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0 ; GFX10-NEXT: s_cselect_b32 s15, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s18, 0 ; GFX10-NEXT: s_cselect_b32 s17, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s4, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s5, 0 ; GFX10-NEXT: s_cselect_b32 s5, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s6, 0 ; GFX10-NEXT: s_cselect_b32 s6, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s7, 0 ; GFX10-NEXT: s_cselect_b32 s7, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s8, 0 ; GFX10-NEXT: s_cselect_b32 s8, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s9, 0 ; GFX10-NEXT: s_cselect_b32 s9, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s10, 0 ; GFX10-NEXT: s_cselect_b32 s10, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10-NEXT: s_cselect_b32 s11, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s14, 0 ; GFX10-NEXT: s_cselect_b32 s14, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0 ; GFX10-NEXT: s_cselect_b32 s15, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s17, 0 ; GFX10-NEXT: s_cselect_b32 s17, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0 ; GFX10-NEXT: s_add_i32 s4, s4, s5 ; GFX10-NEXT: s_add_i32 s4, s4, s6 ; GFX10-NEXT: s_add_i32 s4, s4, s7 ; GFX10-NEXT: s_add_i32 s4, s4, s8 ; GFX10-NEXT: s_add_i32 s4, s4, s9 ; GFX10-NEXT: s_add_i32 s4, s4, s10 ; GFX10-NEXT: s_add_i32 s4, s4, s11 ; GFX10-NEXT: s_add_i32 s4, s4, s12 ; GFX10-NEXT: s_add_i32 s4, s4, s13 ; GFX10-NEXT: s_add_i32 s4, s4, s14 ; GFX10-NEXT: s_add_i32 s4, s4, s15 ; GFX10-NEXT: s_add_i32 s4, s4, s17 ; GFX10-NEXT: s_add_i32 s4, s4, s16 ; GFX10-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-NEXT: flat_store_dword v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: fcmp_f32_uniform: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_cmp_eq_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 ; GFX12-NEXT: s_cmp_gt_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 ; GFX12-NEXT: s_cmp_ge_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 ; GFX12-NEXT: s_cmp_lt_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 ; GFX12-NEXT: s_cmp_le_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 ; GFX12-NEXT: s_cmp_lg_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s7, 1, 0 ; GFX12-NEXT: s_cmp_o_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s8, 1, 0 ; GFX12-NEXT: s_cmp_nlg_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s9, 1, 0 ; GFX12-NEXT: s_cmp_nle_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s10, 1, 0 ; GFX12-NEXT: s_cmp_nlt_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s11, 1, 0 ; GFX12-NEXT: s_cmp_nge_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s12, 1, 0 ; GFX12-NEXT: s_cmp_ngt_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s13, 1, 0 ; GFX12-NEXT: s_cmp_neq_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s14, 1, 0 ; GFX12-NEXT: s_cmp_u_f32 s0, s1 ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_cmp_lg_u32 s2, 0 ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s3, 0 ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s4, 0 ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s5, 0 ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s6, 0 ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s7, 0 ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s8, 0 ; GFX12-NEXT: s_cselect_b32 s7, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s9, 0 ; GFX12-NEXT: s_cselect_b32 s8, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s10, 0 ; GFX12-NEXT: s_cselect_b32 s9, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s11, 0 ; GFX12-NEXT: s_cselect_b32 s10, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s12, 0 ; GFX12-NEXT: s_cselect_b32 s11, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s13, 0 ; GFX12-NEXT: s_cselect_b32 s12, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s14, 0 ; GFX12-NEXT: s_cselect_b32 s13, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s0, 0 ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s2 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s3 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s4 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s5 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s6 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s7 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s8 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s9 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s10 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s11 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s12 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s13 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s0, s1, s0 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: v_mov_b32_e32 v2, s0 ; GFX12-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %oeq_result = fcmp oeq float %a, %b %ogt_result = fcmp ogt float %a, %b %oge_result = fcmp oge float %a, %b %olt_result = fcmp olt float %a, %b %ole_result = fcmp ole float %a, %b %one_result = fcmp one float %a, %b %ord_result = fcmp ord float %a, %b %ueq_result = fcmp ueq float %a, %b %ugt_result = fcmp ugt float %a, %b %uge_result = fcmp uge float %a, %b %ult_result = fcmp ult float %a, %b %ule_result = fcmp ule float %a, %b %une_result = fcmp une float %a, %b %uno_result = fcmp uno float %a, %b %oeq_zext = zext i1 %oeq_result to i32 %ogt_zext = zext i1 %ogt_result to i32 %oge_zext = zext i1 %oge_result to i32 %olt_zext = zext i1 %olt_result to i32 %ole_zext = zext i1 %ole_result to i32 %one_zext = zext i1 %one_result to i32 %ord_zext = zext i1 %ord_result to i32 %ueq_zext = zext i1 %ueq_result to i32 %ugt_zext = zext i1 %ugt_result to i32 %uge_zext = zext i1 %uge_result to i32 %ult_zext = zext i1 %ult_result to i32 %ule_zext = zext i1 %ule_result to i32 %une_zext = zext i1 %une_result to i32 %uno_zext = zext i1 %uno_result to i32 %sum1 = add i32 %oeq_zext, %ogt_zext %sum2 = add i32 %sum1, %oge_zext %sum3 = add i32 %sum2, %olt_zext %sum4 = add i32 %sum3, %ole_zext %sum5 = add i32 %sum4, %one_zext %sum6 = add i32 %sum5, %ord_zext %sum7 = add i32 %sum6, %ueq_zext %sum8 = add i32 %sum7, %ugt_zext %sum9 = add i32 %sum8, %uge_zext %sum10 = add i32 %sum9, %ult_zext %sum11 = add i32 %sum10, %ule_zext %sum12 = add i32 %sum11, %une_zext %result = add i32 %sum12, %uno_zext store i32 %result, ptr %p ret void } define void @fcmp_f32_divergent(float %a, float %b, ptr %p) { ; GFX10-LABEL: fcmp_f32_divergent: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_add_nc_u32_e32 v4, v4, v5 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_le_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_add3_u32 v4, v4, v6, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_lg_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_add3_u32 v4, v4, v5, v8 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_nlg_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_nle_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_add3_u32 v4, v4, v6, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_nge_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_add3_u32 v4, v4, v5, v8 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_neq_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v1 ; GFX10-NEXT: v_add3_u32 v1, v4, v6, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX10-NEXT: v_add3_u32 v0, v1, v5, v0 ; GFX10-NEXT: flat_store_dword v[2:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: fcmp_f32_divergent: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add_nc_u32_e32 v4, v4, v5 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_le_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add3_u32 v4, v4, v6, v7 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_lg_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add3_u32 v4, v4, v5, v8 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_nlg_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_nle_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add3_u32 v4, v4, v6, v7 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_nge_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add3_u32 v4, v4, v5, v8 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_neq_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v1 ; GFX12-NEXT: v_add3_u32 v1, v4, v6, v7 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_add3_u32 v0, v1, v5, v0 ; GFX12-NEXT: flat_store_b32 v[2:3], v0 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %oeq_result = fcmp oeq float %a, %b %ogt_result = fcmp ogt float %a, %b %oge_result = fcmp oge float %a, %b %olt_result = fcmp olt float %a, %b %ole_result = fcmp ole float %a, %b %one_result = fcmp one float %a, %b %ord_result = fcmp ord float %a, %b %ueq_result = fcmp ueq float %a, %b %ugt_result = fcmp ugt float %a, %b %uge_result = fcmp uge float %a, %b %ult_result = fcmp ult float %a, %b %ule_result = fcmp ule float %a, %b %une_result = fcmp une float %a, %b %uno_result = fcmp uno float %a, %b %oeq_zext = zext i1 %oeq_result to i32 %ogt_zext = zext i1 %ogt_result to i32 %oge_zext = zext i1 %oge_result to i32 %olt_zext = zext i1 %olt_result to i32 %ole_zext = zext i1 %ole_result to i32 %one_zext = zext i1 %one_result to i32 %ord_zext = zext i1 %ord_result to i32 %ueq_zext = zext i1 %ueq_result to i32 %ugt_zext = zext i1 %ugt_result to i32 %uge_zext = zext i1 %uge_result to i32 %ult_zext = zext i1 %ult_result to i32 %ule_zext = zext i1 %ule_result to i32 %une_zext = zext i1 %une_result to i32 %uno_zext = zext i1 %uno_result to i32 %sum1 = add i32 %oeq_zext, %ogt_zext %sum2 = add i32 %sum1, %oge_zext %sum3 = add i32 %sum2, %olt_zext %sum4 = add i32 %sum3, %ole_zext %sum5 = add i32 %sum4, %one_zext %sum6 = add i32 %sum5, %ord_zext %sum7 = add i32 %sum6, %ueq_zext %sum8 = add i32 %sum7, %ugt_zext %sum9 = add i32 %sum8, %uge_zext %sum10 = add i32 %sum9, %ult_zext %sum11 = add i32 %sum10, %ule_zext %sum12 = add i32 %sum11, %une_zext %result = add i32 %sum12, %uno_zext store i32 %result, ptr %p ret void } define void @fcmp_f64_uniform(double inreg %a, double inreg %b, ptr %p) { ; GFX10-LABEL: fcmp_f64_uniform: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_f64_e64 s4, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_gt_f64_e64 s5, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_ge_f64_e64 s6, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_lt_f64_e64 s7, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_le_f64_e64 s8, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_lg_f64_e64 s9, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_o_f64_e64 s10, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_nlg_f64_e64 s11, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_nle_f64_e64 s12, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_nlt_f64_e64 s13, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_nge_f64_e64 s14, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_ngt_f64_e64 s15, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_neq_f64_e64 s20, s[16:17], s[18:19] ; GFX10-NEXT: v_cmp_u_f64_e64 s16, s[16:17], s[18:19] ; GFX10-NEXT: s_cmp_lg_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s4, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s5, 0 ; GFX10-NEXT: s_cselect_b32 s5, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s6, 0 ; GFX10-NEXT: s_cselect_b32 s6, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s7, 0 ; GFX10-NEXT: s_cselect_b32 s7, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s8, 0 ; GFX10-NEXT: s_cselect_b32 s8, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s9, 0 ; GFX10-NEXT: s_cselect_b32 s9, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s10, 0 ; GFX10-NEXT: s_cselect_b32 s10, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10-NEXT: s_cselect_b32 s11, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s14, 0 ; GFX10-NEXT: s_cselect_b32 s14, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0 ; GFX10-NEXT: s_cselect_b32 s15, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s20, 0 ; GFX10-NEXT: s_cselect_b32 s17, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s4, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s5, 0 ; GFX10-NEXT: s_cselect_b32 s5, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s6, 0 ; GFX10-NEXT: s_cselect_b32 s6, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s7, 0 ; GFX10-NEXT: s_cselect_b32 s7, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s8, 0 ; GFX10-NEXT: s_cselect_b32 s8, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s9, 0 ; GFX10-NEXT: s_cselect_b32 s9, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s10, 0 ; GFX10-NEXT: s_cselect_b32 s10, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10-NEXT: s_cselect_b32 s11, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s14, 0 ; GFX10-NEXT: s_cselect_b32 s14, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0 ; GFX10-NEXT: s_cselect_b32 s15, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s17, 0 ; GFX10-NEXT: s_cselect_b32 s17, 1, 0 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0 ; GFX10-NEXT: s_add_i32 s4, s4, s5 ; GFX10-NEXT: s_add_i32 s4, s4, s6 ; GFX10-NEXT: s_add_i32 s4, s4, s7 ; GFX10-NEXT: s_add_i32 s4, s4, s8 ; GFX10-NEXT: s_add_i32 s4, s4, s9 ; GFX10-NEXT: s_add_i32 s4, s4, s10 ; GFX10-NEXT: s_add_i32 s4, s4, s11 ; GFX10-NEXT: s_add_i32 s4, s4, s12 ; GFX10-NEXT: s_add_i32 s4, s4, s13 ; GFX10-NEXT: s_add_i32 s4, s4, s14 ; GFX10-NEXT: s_add_i32 s4, s4, s15 ; GFX10-NEXT: s_add_i32 s4, s4, s17 ; GFX10-NEXT: s_add_i32 s4, s4, s16 ; GFX10-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-NEXT: flat_store_dword v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: fcmp_f64_uniform: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_cmp_eq_f64_e64 s4, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_gt_f64_e64 s5, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_ge_f64_e64 s6, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_lt_f64_e64 s7, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_le_f64_e64 s8, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_lg_f64_e64 s9, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_o_f64_e64 s10, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_nlg_f64_e64 s11, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_nle_f64_e64 s12, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_nlt_f64_e64 s13, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_nge_f64_e64 s14, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_ngt_f64_e64 s15, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_neq_f64_e64 s16, s[0:1], s[2:3] ; GFX12-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[2:3] ; GFX12-NEXT: s_cmp_lg_u32 s4, 0 ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s5, 0 ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s6, 0 ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s7, 0 ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s8, 0 ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s9, 0 ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s10, 0 ; GFX12-NEXT: s_cselect_b32 s7, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s11, 0 ; GFX12-NEXT: s_cselect_b32 s8, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s12, 0 ; GFX12-NEXT: s_cselect_b32 s9, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s13, 0 ; GFX12-NEXT: s_cselect_b32 s10, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s14, 0 ; GFX12-NEXT: s_cselect_b32 s11, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s15, 0 ; GFX12-NEXT: s_cselect_b32 s12, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s16, 0 ; GFX12-NEXT: s_cselect_b32 s13, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s0, 0 ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_cmp_lg_u32 s4, 0 ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s1, 0 ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s2, 0 ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s3, 0 ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s5, 0 ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s6, 0 ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s7, 0 ; GFX12-NEXT: s_cselect_b32 s7, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s8, 0 ; GFX12-NEXT: s_cselect_b32 s8, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s9, 0 ; GFX12-NEXT: s_cselect_b32 s9, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s10, 0 ; GFX12-NEXT: s_cselect_b32 s10, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s11, 0 ; GFX12-NEXT: s_cselect_b32 s11, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s12, 0 ; GFX12-NEXT: s_cselect_b32 s12, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s13, 0 ; GFX12-NEXT: s_cselect_b32 s13, 1, 0 ; GFX12-NEXT: s_cmp_lg_u32 s0, 0 ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s4, s1 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s2 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s3 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s5 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s6 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s7 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s8 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s9 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s10 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s11 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s12 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s1, s1, s13 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_add_co_i32 s0, s1, s0 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: v_mov_b32_e32 v2, s0 ; GFX12-NEXT: flat_store_b32 v[0:1], v2 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %oeq_result = fcmp oeq double %a, %b %ogt_result = fcmp ogt double %a, %b %oge_result = fcmp oge double %a, %b %olt_result = fcmp olt double %a, %b %ole_result = fcmp ole double %a, %b %one_result = fcmp one double %a, %b %ord_result = fcmp ord double %a, %b %ueq_result = fcmp ueq double %a, %b %ugt_result = fcmp ugt double %a, %b %uge_result = fcmp uge double %a, %b %ult_result = fcmp ult double %a, %b %ule_result = fcmp ule double %a, %b %une_result = fcmp une double %a, %b %uno_result = fcmp uno double %a, %b %oeq_zext = zext i1 %oeq_result to i32 %ogt_zext = zext i1 %ogt_result to i32 %oge_zext = zext i1 %oge_result to i32 %olt_zext = zext i1 %olt_result to i32 %ole_zext = zext i1 %ole_result to i32 %one_zext = zext i1 %one_result to i32 %ord_zext = zext i1 %ord_result to i32 %ueq_zext = zext i1 %ueq_result to i32 %ugt_zext = zext i1 %ugt_result to i32 %uge_zext = zext i1 %uge_result to i32 %ult_zext = zext i1 %ult_result to i32 %ule_zext = zext i1 %ule_result to i32 %une_zext = zext i1 %une_result to i32 %uno_zext = zext i1 %uno_result to i32 %sum1 = add i32 %oeq_zext, %ogt_zext %sum2 = add i32 %sum1, %oge_zext %sum3 = add i32 %sum2, %olt_zext %sum4 = add i32 %sum3, %ole_zext %sum5 = add i32 %sum4, %one_zext %sum6 = add i32 %sum5, %ord_zext %sum7 = add i32 %sum6, %ueq_zext %sum8 = add i32 %sum7, %ugt_zext %sum9 = add i32 %sum8, %uge_zext %sum10 = add i32 %sum9, %ult_zext %sum11 = add i32 %sum10, %ule_zext %sum12 = add i32 %sum11, %une_zext %result = add i32 %sum12, %uno_zext store i32 %result, ptr %p ret void } define void @fcmp_f64_divergent(double %a, double %b, ptr %p) { ; GFX10-LABEL: fcmp_f64_divergent: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_ge_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_le_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_add3_u32 v6, v6, v8, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_lg_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_add3_u32 v6, v6, v7, v10 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_nlg_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_nle_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_add3_u32 v6, v6, v8, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_nlt_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_nge_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_add3_u32 v6, v6, v7, v10 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_ngt_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_neq_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_add3_u32 v1, v6, v8, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX10-NEXT: v_add3_u32 v0, v1, v7, v0 ; GFX10-NEXT: flat_store_dword v[4:5], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: fcmp_f64_divergent: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-NEXT: s_wait_expcnt 0x0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_cmp_eq_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_gt_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_ge_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add_nc_u32_e32 v6, v6, v7 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_le_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add3_u32 v6, v6, v8, v9 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_lg_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add3_u32 v6, v6, v7, v10 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_nlg_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_nle_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add3_u32 v6, v6, v8, v9 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_nlt_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_nge_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add3_u32 v6, v6, v7, v10 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_ngt_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_neq_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo ; GFX12-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX12-NEXT: v_add3_u32 v1, v6, v8, v9 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_add3_u32 v0, v1, v7, v0 ; GFX12-NEXT: flat_store_b32 v[4:5], v0 ; GFX12-NEXT: s_wait_dscnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %oeq_result = fcmp oeq double %a, %b %ogt_result = fcmp ogt double %a, %b %oge_result = fcmp oge double %a, %b %olt_result = fcmp olt double %a, %b %ole_result = fcmp ole double %a, %b %one_result = fcmp one double %a, %b %ord_result = fcmp ord double %a, %b %ueq_result = fcmp ueq double %a, %b %ugt_result = fcmp ugt double %a, %b %uge_result = fcmp uge double %a, %b %ult_result = fcmp ult double %a, %b %ule_result = fcmp ule double %a, %b %une_result = fcmp une double %a, %b %uno_result = fcmp uno double %a, %b %oeq_zext = zext i1 %oeq_result to i32 %ogt_zext = zext i1 %ogt_result to i32 %oge_zext = zext i1 %oge_result to i32 %olt_zext = zext i1 %olt_result to i32 %ole_zext = zext i1 %ole_result to i32 %one_zext = zext i1 %one_result to i32 %ord_zext = zext i1 %ord_result to i32 %ueq_zext = zext i1 %ueq_result to i32 %ugt_zext = zext i1 %ugt_result to i32 %uge_zext = zext i1 %uge_result to i32 %ult_zext = zext i1 %ult_result to i32 %ule_zext = zext i1 %ule_result to i32 %une_zext = zext i1 %une_result to i32 %uno_zext = zext i1 %uno_result to i32 %sum1 = add i32 %oeq_zext, %ogt_zext %sum2 = add i32 %sum1, %oge_zext %sum3 = add i32 %sum2, %olt_zext %sum4 = add i32 %sum3, %ole_zext %sum5 = add i32 %sum4, %one_zext %sum6 = add i32 %sum5, %ord_zext %sum7 = add i32 %sum6, %ueq_zext %sum8 = add i32 %sum7, %ugt_zext %sum9 = add i32 %sum8, %uge_zext %sum10 = add i32 %sum9, %ult_zext %sum11 = add i32 %sum10, %ule_zext %sum12 = add i32 %sum11, %une_zext %result = add i32 %sum12, %uno_zext store i32 %result, ptr %p ret void }