; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX1010 %s ; Test the CMP+SELECT optimization that folds shared constants to reduce ; register pressure. ;------------------------------------------------------------------------------ ; F32 Tests ;------------------------------------------------------------------------------ ; Should be folded: fcmp oeq + select with constant in true value define float @fcmp_select_fold_oeq_f32_imm(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_fold_oeq_f32_imm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 ; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_oeq_f32_imm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq float %arg, 0x40490FDB00000000 %sel = select i1 %cmp, float 0x40490FDB00000000, float %other ret float %sel } ; Should be folded: fcmp oeq + select with constant in true value (commutative) define float @fcmp_select_fold_oeq_imm_f32(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_fold_oeq_imm_f32: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 ; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f32: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq float 0x40490FDB00000000, %arg %sel = select i1 %cmp, float 0x40490FDB00000000, float %other ret float %sel } ; Should be folded: fcmp one + select with constant in false value define float @fcmp_select_fold_one_f32_imm(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_fold_one_f32_imm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x402df850 ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_one_f32_imm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x402df850, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one float %arg, 0x4005BF0A00000000 %sel = select i1 %cmp, float %other, float 0x4005BF0A00000000 ret float %sel } ; Should be folded: fcmp one + select with constant in false value (commutative) define float @fcmp_select_fold_one_imm_f32(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_fold_one_imm_f32: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x402df850 ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_one_imm_f32: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x402df850, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one float 0x4005BF0A00000000, %arg %sel = select i1 %cmp, float %other, float 0x4005BF0A00000000 ret float %sel } ; Should NOT be folded: different constants define float @fcmp_select_no_fold_f32_different_const(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_no_fold_f32_different_const: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x46487ed8 ; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f32_different_const: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x42487ed8, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x46487ed8, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq float %arg, 0x40490FDB00000000 %sel = select i1 %cmp, float 0x40C90FDB00000000, float %other ret float %sel } ; Should NOT be folded: fcmp oeq with constant in other position define float @fcmp_select_no_fold_f32_other_pos(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_no_fold_f32_other_pos: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x42487ed8 ; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f32_other_pos: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x42487ed8, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq float %arg, 0x40490FDB00000000 %sel = select i1 %cmp, float %other, float 0x40490FDB00000000 ret float %sel } ; Should NOT be folded: unsupported comparison type define float @fcmp_select_no_fold_f32_unsupported_cmp(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_no_fold_f32_unsupported_cmp: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x42487ed8 ; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f32_unsupported_cmp: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x42487ed8, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x42487ed8, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp olt float %arg, 0x40490FDB00000000 %sel = select i1 %cmp, float %other, float 0x40490FDB00000000 ret float %sel } ; Should NOT be folded: imm can be encoded into cndmask define float @fcmp_select_no_fold_f32_enc_imm(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_no_fold_f32_enc_imm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 1.0, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f32_enc_imm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 1.0, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq float %arg, 1.0 %sel = select i1 %cmp, float 1.0, float %other ret float %sel } ; Should NOT be folded: imm can be encoded into cndmask define float @fcmp_select_no_fold_f32_enc_imm_2(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_no_fold_f32_enc_imm_2: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, -4.0, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f32_enc_imm_2: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, -4.0, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one float -4.0, %arg %sel = select i1 %cmp, float %other, float -4.0 ret float %sel } ; Should NOT be folded: fcmp oeq with zero constant define float @fcmp_select_no_fold_oeq_f32_zero(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_no_fold_oeq_f32_zero: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_oeq_f32_zero: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq float %arg, 0.0 %sel = select i1 %cmp, float 0.0, float %other ret float %sel } ; Should NOT be folded: fcmp one with negative zero constant define float @fcmp_select_no_fold_one_f32_negzero(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_no_fold_one_f32_negzero: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_brev_b32 s4, 1 ; GFX900-NEXT: v_bfrev_b32_e32 v2, 1 ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_one_f32_negzero: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x80000000, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x80000000, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one float -0.0, %arg ; 0x8000000000000000 %sel = select i1 %cmp, float %other, float -0.0 ;0x8000000000000000 ret float %sel } ; NaN values should bypass the optimization due to special IEEE 754 behavior ; fcmp oeq with NaN always returns false, so select always chooses %other define float @fcmp_select_no_fold_oeq_f32_nan(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_no_fold_oeq_f32_nan: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, v1 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_oeq_f32_nan: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_mov_b32_e32 v0, v1 ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq float %arg, 0x7FF8000000000000 %sel = select i1 %cmp, float 0x7FF8000000000000, float %other ret float %sel } ; NaN values should bypass the optimization due to special IEEE 754 behavior ; fcmp one with NaN always returns false, so select always chooses the NaN constant define float @fcmp_select_no_fold_one_f32_nan(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_no_fold_one_f32_nan: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, 0x7fc00000 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_one_f32_nan: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7fc00000 ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one float 0x7FF8000000000000, %arg %sel = select i1 %cmp, float %other, float 0x7FF8000000000000 ret float %sel } ; Should NOT be folded: fcmp one with positive infinity ; Infinity values should bypass the optimization, generating unfolded code define float @fcmp_select_no_fold_posinf_oeq_f32(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_no_fold_posinf_oeq_f32: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x7f800000 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_posinf_oeq_f32: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x7f800000, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq float %arg, 0x7FF0000000000000 %sel = select i1 %cmp, float 0x7FF0000000000000, float %other ret float %sel } ; Should NOT be folded: fcmp one with negative infinity ; Infinity values should bypass the optimization, generating unfolded code define float @fcmp_select_no_fold_neginf_f32_one(float %arg, float %other) { ; GFX900-LABEL: fcmp_select_no_fold_neginf_f32_one: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0xff800000 ; GFX900-NEXT: v_mov_b32_e32 v2, 0xff800000 ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_neginf_f32_one: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0xff800000, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xff800000, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one float 0xFFF0000000000000, %arg %sel = select i1 %cmp, float %other, float 0xFFF0000000000000 ret float %sel } ;------------------------------------------------------------------------------ ; F64 Tests ;------------------------------------------------------------------------------ ; Should be folded: f64 fcmp oeq + select with constant in true value define double @fcmp_select_fold_oeq_f64_imm(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_fold_oeq_f64_imm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 ; GFX900-NEXT: s_mov_b32 s5, 0x400921fb ; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1] ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_oeq_f64_imm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 ; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb ; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq double %arg, 3.141592653589793 %sel = select i1 %cmp, double 3.141592653589793, double %other ret double %sel } ; Should be folded: f64 fcmp oeq + select with constant in true value (commutative) define double @fcmp_select_fold_oeq_imm_f64(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_fold_oeq_imm_f64: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 ; GFX900-NEXT: s_mov_b32 s5, 0x400921fb ; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1] ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f64: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 ; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb ; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq double 3.141592653589793, %arg %sel = select i1 %cmp, double 3.141592653589793, double %other ret double %sel } ; Should be folded: f64 fcmp one + select with constant in false value define double @fcmp_select_fold_one_f64_imm(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_fold_one_f64_imm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x8b145769 ; GFX900-NEXT: s_mov_b32 s5, 0x4005bf0a ; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1] ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_one_f64_imm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: s_mov_b32 s4, 0x8b145769 ; GFX1010-NEXT: s_mov_b32 s5, 0x4005bf0a ; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one double %arg, 2.718281828459045 %sel = select i1 %cmp, double %other, double 2.718281828459045 ret double %sel } ; Should be folded: f64 fcmp one + select with constant in false value (commutative) define double @fcmp_select_fold_one_imm_f64(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_fold_one_imm_f64: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x8b145769 ; GFX900-NEXT: s_mov_b32 s5, 0x4005bf0a ; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1] ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_one_imm_f64: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: s_mov_b32 s4, 0x8b145769 ; GFX1010-NEXT: s_mov_b32 s5, 0x4005bf0a ; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one double 2.718281828459045, %arg %sel = select i1 %cmp, double %other, double 2.718281828459045 ret double %sel } ; Should NOT be folded: f64 fcmp oeq with constant in other position define double @fcmp_select_no_fold_f64_other_pos(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_f64_other_pos: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 ; GFX900-NEXT: s_mov_b32 s5, 0x400921fb ; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v4, 0x54442d18 ; GFX900-NEXT: v_mov_b32_e32 v1, 0x400921fb ; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f64_other_pos: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 ; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb ; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x54442d18, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x400921fb, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq double %arg, 3.141592653589793 %sel = select i1 %cmp, double %other, double 3.141592653589793 ret double %sel } ; Should NOT be folded: f64 fcmp unsupported comparison type define double @fcmp_select_no_fold_f64_unsupported_cmp(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_f64_unsupported_cmp: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 ; GFX900-NEXT: s_mov_b32 s5, 0x400921fb ; GFX900-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v4, 0x54442d18 ; GFX900-NEXT: v_mov_b32_e32 v1, 0x400921fb ; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f64_unsupported_cmp: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 ; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb ; GFX1010-NEXT: v_cmp_gt_f64_e32 vcc_lo, s[4:5], v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x54442d18, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x400921fb, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp olt double %arg, 3.141592653589793 %sel = select i1 %cmp, double %other, double 3.141592653589793 ret double %sel } ; Should NOT be folded: imm can be encoded into cndmask define double @fcmp_select_no_fold_f64_enc_imm(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_f64_enc_imm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, 1.0, v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v1, 0x3ff00000 ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f64_enc_imm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 1.0, v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x3ff00000, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq double %arg, 1.0 %sel = select i1 %cmp, double 1.0, double %other ret double %sel } ; Should NOT be folded: imm can be encoded into cndmask define double @fcmp_select_no_fold_f64_enc_imm_2(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_f64_enc_imm_2: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, -4.0, v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v1, 0xc0100000 ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f64_enc_imm_2: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, -4.0, v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xc0100000, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one double -4.0, %arg %sel = select i1 %cmp, double %other, double -4.0 ret double %sel } ; Should NOT be folded: f64 fcmp oeq with zero constant define double @fcmp_select_no_fold_oeq_f64_zero(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_oeq_f64_zero: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, 0, v[0:1] ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_oeq_f64_zero: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0, v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq double %arg, 0.0 %sel = select i1 %cmp, double 0.0, double %other ret double %sel } ; Should NOT be folded: f64 fcmp one with negative zero constant define double @fcmp_select_no_fold_one_f64_negzero(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_one_f64_negzero: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0 ; GFX900-NEXT: s_brev_b32 s5, 1 ; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1] ; GFX900-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_one_f64_negzero: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, 0x80000000, v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x80000000, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one double -0.0, %arg %sel = select i1 %cmp, double %other, double -0.0 ret double %sel } ; Should NOT be folded: f64 different constants define double @fcmp_select_no_fold_f64_different_const(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_f64_different_const: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 ; GFX900-NEXT: s_mov_b32 s5, 0x400921fb ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v4, 0x8b145769 ; GFX900-NEXT: v_mov_b32_e32 v1, 0x4005bf0a ; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f64_different_const: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 ; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, s[4:5], v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x8b145769, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x4005bf0a, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq double %arg, 3.141592653589793 %sel = select i1 %cmp, double 2.718281828459045, double %other ret double %sel } ; Should NOT be folded: f64 fcmp oeq with NaN constant ; fcmp oeq with NaN always returns false, so select always chooses %other define double @fcmp_select_no_fold_nan_f64(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_nan_f64: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v1, v3 ; GFX900-NEXT: v_mov_b32_e32 v0, v2 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_nan_f64: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_mov_b32_e32 v1, v3 ; GFX1010-NEXT: v_mov_b32_e32 v0, v2 ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq double %arg, 0x7FF8000000000000 %sel = select i1 %cmp, double 0x7FF8000000000000, double %other ret double %sel } ; Should NOT be folded: f64 fcmp oeq with NaN constant (commutative variant) ; fcmp oeq with NaN always returns false, so select always chooses %other define double @fcmp_select_no_fold_nan_f64_comm(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_nan_f64_comm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v1, v3 ; GFX900-NEXT: v_mov_b32_e32 v0, v2 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_comm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_mov_b32_e32 v1, v3 ; GFX1010-NEXT: v_mov_b32_e32 v0, v2 ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq double 0x7FF8000000000000, %arg %sel = select i1 %cmp, double 0x7FF8000000000000, double %other ret double %sel } ; Should NOT be folded: f64 fcmp one with NaN constant ; fcmp one with NaN always returns false, so select always chooses the NaN constant define double @fcmp_select_no_fold_nan_f64_one(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_nan_f64_one: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_one: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_mov_b32_e32 v0, 0 ; GFX1010-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one double %arg, 0x7FF8000000000000 %sel = select i1 %cmp, double %other, double 0x7FF8000000000000 ret double %sel } ; Should NOT be folded: f64 fcmp one with NaN constant (commutative variant) ; fcmp one with NaN always returns false, so select always chooses the NaN constant define double @fcmp_select_no_fold_nan_f64_one_comm(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_nan_f64_one_comm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_one_comm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_mov_b32_e32 v0, 0 ; GFX1010-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one double 0x7FF8000000000000, %arg %sel = select i1 %cmp, double %other, double 0x7FF8000000000000 ret double %sel } ; Should NOT be folded: f64 fcmp oeq with positive infinity ; Infinity values should bypass the optimization, generating unfolded code define double @fcmp_select_no_fold_posinf_f64(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_posinf_f64: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0 ; GFX900-NEXT: s_mov_b32 s5, 0x7ff00000 ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff00000 ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_posinf_f64: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x7ff00000, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq double %arg, 0x7FF0000000000000 %sel = select i1 %cmp, double 0x7FF0000000000000, double %other ret double %sel } ; Should NOT be folded: f64 fcmp oeq with negative infinity ; Infinity values should bypass the optimization, generating unfolded code define double @fcmp_select_no_fold_neginf_f64(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_neginf_f64: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0 ; GFX900-NEXT: s_mov_b32 s5, 0xfff00000 ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v1, 0xfff00000 ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_neginf_f64: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0xfff00000, v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xfff00000, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq double %arg, 0xFFF0000000000000 %sel = select i1 %cmp, double 0xFFF0000000000000, double %other ret double %sel } ; Should NOT be folded: f64 fcmp oeq with positive infinity (commutative variant) ; Infinity values should bypass the optimization, generating unfolded code define double @fcmp_select_no_fold_posinf_f64_comm(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_posinf_f64_comm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0 ; GFX900-NEXT: s_mov_b32 s5, 0x7ff00000 ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff00000 ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_posinf_f64_comm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x7ff00000, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq double 0x7FF0000000000000, %arg %sel = select i1 %cmp, double 0x7FF0000000000000, double %other ret double %sel } ; Should NOT be folded: f64 fcmp oeq with negative infinity (commutative variant) ; Infinity values should bypass the optimization, generating unfolded code define double @fcmp_select_no_fold_neginf_f64_comm(double %arg, double %other) { ; GFX900-LABEL: fcmp_select_no_fold_neginf_f64_comm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0 ; GFX900-NEXT: s_mov_b32 s5, 0xfff00000 ; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v1, 0xfff00000 ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_neginf_f64_comm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0xfff00000, v[0:1] ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xfff00000, v3, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq double 0xFFF0000000000000, %arg %sel = select i1 %cmp, double 0xFFF0000000000000, double %other ret double %sel } ;------------------------------------------------------------------------------ ; F16 Tests ;------------------------------------------------------------------------------ ; Should be folded: f16 fcmp oeq + select with constant in true value define half @fcmp_select_fold_oeq_f16_imm(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_fold_oeq_f16_imm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_movk_i32 s4, 0x4248 ; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_oeq_f16_imm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq half %arg, 0xH4248 %sel = select i1 %cmp, half 0xH4248, half %other ret half %sel } ; Should be folded: f16 fcmp oeq + select with constant in true value (commutative) define half @fcmp_select_fold_oeq_imm_f16(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_fold_oeq_imm_f16: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_movk_i32 s4, 0x4248 ; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f16: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq half 0xH4248, %arg %sel = select i1 %cmp, half 0xH4248, half %other ret half %sel } ; Should be folded: f16 fcmp one + select with constant in false value define half @fcmp_select_fold_one_f16_imm(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_fold_one_f16_imm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_movk_i32 s4, 0x4020 ; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_one_f16_imm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x4020, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one half %arg, 0xH4020 %sel = select i1 %cmp, half %other, half 0xH4020 ret half %sel } ; Should be folded: f16 fcmp one + select with constant in false value (commutative) define half @fcmp_select_fold_one_imm_f16(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_fold_one_imm_f16: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_movk_i32 s4, 0x4020 ; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_one_imm_f16: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x4020, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one half 0xH4020, %arg %sel = select i1 %cmp, half %other, half 0xH4020 ret half %sel } ; Should NOT be folded: different constants define half @fcmp_select_no_fold_f16_different_const(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_no_fold_f16_different_const: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_movk_i32 s4, 0x4248 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4300 ; GFX900-NEXT: v_cmp_neq_f16_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f16_different_const: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0x4248, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4300, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq half %arg, 0xH4248 %sel = select i1 %cmp, half 0xH4300, half %other ret half %sel } ; Should NOT be folded: NaN values bypass optimization define half @fcmp_select_no_fold_nan_f16(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_no_fold_nan_f16: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, v1 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_nan_f16: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_mov_b32_e32 v0, v1 ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq half %arg, 0xH7e00 %sel = select i1 %cmp, half 0xH7e00, half %other ret half %sel } ; Should NOT be folded: f16 fcmp one with NaN constant define half @fcmp_select_no_fold_nan_f16_one(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_no_fold_nan_f16_one: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, 0x7e00 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_nan_f16_one: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7e00 ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one half %arg, 0xH7e00 %sel = select i1 %cmp, half %other, half 0xH7e00 ret half %sel } ; Should NOT be folded: f16 fcmp one with +Inf constant define half @fcmp_select_no_fold_posinf_f16_one(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_no_fold_posinf_f16_one: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_movk_i32 s4, 0x7c00 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x7c00 ; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_posinf_f16_one: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x7c00, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one half %arg, 0xH7c00 %sel = select i1 %cmp, half %other, half 0xH7c00 ret half %sel } ; Should NOT be folded: f16 fcmp one with -Inf constant define half @fcmp_select_no_fold_neginf_f16_one(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_no_fold_neginf_f16_one: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0xfc00 ; GFX900-NEXT: v_mov_b32_e32 v2, 0xfc00 ; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_neginf_f16_one: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0xfc00, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xfc00, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one half %arg, 0xHfc00 %sel = select i1 %cmp, half %other, half 0xHfc00 ret half %sel } ; Should NOT be folded: f16 fcmp oeq with zero constant define half @fcmp_select_no_fold_oeq_f16_zero(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_no_fold_oeq_f16_zero: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_cmp_neq_f16_e32 vcc, 0, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_oeq_f16_zero: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq half %arg, 0xH0000 %sel = select i1 %cmp, half 0xH0000, half %other ret half %sel } ; Should NOT be folded: f16 fcmp one with negative zero constant define half @fcmp_select_no_fold_one_f16_negzero(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_no_fold_one_f16_negzero: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, 0x8000 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x8000 ; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_one_f16_negzero: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x8000, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x8000, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one half 0xH8000, %arg %sel = select i1 %cmp, half %other, half 0xH8000 ret half %sel } ; Should NOT be folded: f16 fcmp oeq with constant in other position define half @fcmp_select_no_fold_f16_other_pos(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_no_fold_f16_other_pos: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_movk_i32 s4, 0x4248 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248 ; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f16_other_pos: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq half %arg, 0xH4248 %sel = select i1 %cmp, half %other, half 0xH4248 ret half %sel } ; Should NOT be folded: f16 unsupported comparison type define half @fcmp_select_no_fold_f16_unsupported_cmp(half %arg, half %other) { ; GFX900-LABEL: fcmp_select_no_fold_f16_unsupported_cmp: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_movk_i32 s4, 0x4248 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248 ; GFX900-NEXT: v_cmp_gt_f16_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_f16_unsupported_cmp: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0x4248, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp olt half %arg, 0xH4248 %sel = select i1 %cmp, half %other, half 0xH4248 ret half %sel } ;------------------------------------------------------------------------------ ; BF16 Tests ;------------------------------------------------------------------------------ ; Should be folded: bfloat fcmp oeq + select with constant in true value define bfloat @fcmp_select_fold_oeq_bf16_imm(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_fold_oeq_bf16_imm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x42480000 ; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_oeq_bf16_imm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0 ; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v2 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq bfloat %arg, 0xR4248 %sel = select i1 %cmp, bfloat 0xR4248, bfloat %other ret bfloat %sel } ; Should be folded: bfloat fcmp oeq + select with constant in true value (commutative) define bfloat @fcmp_select_fold_oeq_imm_bf16(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_fold_oeq_imm_bf16: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x42480000 ; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_oeq_imm_bf16: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0 ; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v2 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq bfloat 0xR4248, %arg %sel = select i1 %cmp, bfloat 0xR4248, bfloat %other ret bfloat %sel } ; Should be folded: bfloat fcmp one + select with constant in false value define bfloat @fcmp_select_fold_one_bf16_imm(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_fold_one_bf16_imm: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x40200000 ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v2 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_one_bf16_imm: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0 ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x40200000, v2 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one bfloat %arg, 0xR4020 %sel = select i1 %cmp, bfloat %other, bfloat 0xR4020 ret bfloat %sel } ; Should be folded: bfloat fcmp one + select with constant in false value (commutative) define bfloat @fcmp_select_fold_one_imm_bf16(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_fold_one_imm_bf16: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x40200000 ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v2 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_fold_one_imm_bf16: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0 ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x40200000, v2 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one bfloat 0xR4020, %arg %sel = select i1 %cmp, bfloat %other, bfloat 0xR4020 ret bfloat %sel } ; Should NOT be folded: different constants define bfloat @fcmp_select_no_fold_bf16_different_const(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_no_fold_bf16_different_const: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x42480000 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4300 ; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_bf16_different_const: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x42480000, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4300, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq bfloat %arg, 0xR4248 %sel = select i1 %cmp, bfloat 0xR4300, bfloat %other ret bfloat %sel } ; Should NOT be folded: NaN values bypass optimization define bfloat @fcmp_select_no_fold_nan_bf16(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_no_fold_nan_bf16: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, v1 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_nan_bf16: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_mov_b32_e32 v0, v1 ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq bfloat %arg, 0xR7FC0 %sel = select i1 %cmp, bfloat 0xR7FC0, bfloat %other ret bfloat %sel } ; Should NOT be folded: bfloat fcmp one with NaN constant define bfloat @fcmp_select_no_fold_nan_bf16_one(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_no_fold_nan_bf16_one: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mov_b32_e32 v0, 0x7fc0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_nan_bf16_one: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7fc0 ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one bfloat %arg, 0xR7FC0 %sel = select i1 %cmp, bfloat %other, bfloat 0xR7FC0 ret bfloat %sel } ; Should NOT be folded: bfloat fcmp one with +Inf constant define bfloat @fcmp_select_no_fold_posinf_bf16_one(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_no_fold_posinf_bf16_one: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x7f800000 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f80 ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_posinf_bf16_one: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x7f800000, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7f80, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one bfloat %arg, 0xR7F80 %sel = select i1 %cmp, bfloat %other, bfloat 0xR7F80 ret bfloat %sel } ; Should NOT be folded: bfloat fcmp one with -Inf constant define bfloat @fcmp_select_no_fold_neginf_bf16_one(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_no_fold_neginf_bf16_one: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0xff800000 ; GFX900-NEXT: v_mov_b32_e32 v2, 0xffffff80 ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_neginf_bf16_one: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0xff800000, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xffffff80, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one bfloat %arg, 0xRFF80 %sel = select i1 %cmp, bfloat %other, bfloat 0xRFF80 ret bfloat %sel } ; Should NOT be folded: bfloat fcmp oeq with zero constant define bfloat @fcmp_select_no_fold_oeq_bf16_zero(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_no_fold_oeq_bf16_zero: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_oeq_bf16_zero: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq bfloat %arg, 0xR0000 %sel = select i1 %cmp, bfloat 0xR0000, bfloat %other ret bfloat %sel } ; Should NOT be folded: bfloat fcmp one with negative zero constant define bfloat @fcmp_select_no_fold_one_bf16_negzero(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_no_fold_one_bf16_negzero: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: s_brev_b32 s4, 1 ; GFX900-NEXT: v_mov_b32_e32 v2, 0xffff8000 ; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_one_bf16_negzero: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x80000000, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xffff8000, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp one bfloat 0xR8000, %arg %sel = select i1 %cmp, bfloat %other, bfloat 0xR8000 ret bfloat %sel } ; Should NOT be folded: bfloat fcmp oeq with constant in other position define bfloat @fcmp_select_no_fold_bf16_other_pos(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_no_fold_bf16_other_pos: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x42480000 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248 ; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_bf16_other_pos: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp oeq bfloat %arg, 0xR4248 %sel = select i1 %cmp, bfloat %other, bfloat 0xR4248 ret bfloat %sel } ; Should NOT be folded: bfloat unsupported comparison type define bfloat @fcmp_select_no_fold_bf16_unsupported_cmp(bfloat %arg, bfloat %other) { ; GFX900-LABEL: fcmp_select_no_fold_bf16_unsupported_cmp: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x42480000 ; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248 ; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-LABEL: fcmp_select_no_fold_bf16_unsupported_cmp: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1010-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x42480000, v0 ; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo ; GFX1010-NEXT: s_setpc_b64 s[30:31] entry: %cmp = fcmp olt bfloat %arg, 0xR4248 %sel = select i1 %cmp, bfloat %other, bfloat 0xR4248 ret bfloat %sel }