diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/select-cmp-shared-constant-fp.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/select-cmp-shared-constant-fp.ll | 1429 |
1 files changed, 1429 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/select-cmp-shared-constant-fp.ll b/llvm/test/CodeGen/AMDGPU/select-cmp-shared-constant-fp.ll new file mode 100644 index 0000000..11af704 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/select-cmp-shared-constant-fp.ll @@ -0,0 +1,1429 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX1010 %s + +; Test the CMP+SELECT optimization that folds shared constants to reduce +; register pressure. + +;------------------------------------------------------------------------------ +; F32 Tests +;------------------------------------------------------------------------------ + +; Should be folded: fcmp oeq + select with constant in true value +define float @fcmp_select_fold_oeq_f32_imm(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_fold_oeq_f32_imm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 +; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_oeq_f32_imm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq float %arg, 0x40490FDB00000000 + %sel = select i1 %cmp, float 0x40490FDB00000000, float %other + ret float %sel +} + +; Should be folded: fcmp oeq + select with constant in true value (commutative) +define float @fcmp_select_fold_oeq_imm_f32(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_fold_oeq_imm_f32: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 +; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f32: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq float 0x40490FDB00000000, %arg + %sel = select i1 %cmp, float 0x40490FDB00000000, float %other + ret float %sel +} + +; Should be folded: fcmp one + select with constant in false value +define float @fcmp_select_fold_one_f32_imm(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_fold_one_f32_imm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x402df850 +; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_one_f32_imm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x402df850, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one float %arg, 0x4005BF0A00000000 + %sel = select i1 %cmp, float %other, float 0x4005BF0A00000000 + ret float %sel +} + +; Should be folded: fcmp one + select with constant in false value (commutative) +define float @fcmp_select_fold_one_imm_f32(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_fold_one_imm_f32: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x402df850 +; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_one_imm_f32: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x402df850, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one float 0x4005BF0A00000000, %arg + %sel = select i1 %cmp, float %other, float 0x4005BF0A00000000 + ret float %sel +} + +; Should NOT be folded: different constants +define float @fcmp_select_no_fold_f32_different_const(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_no_fold_f32_different_const: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x46487ed8 +; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f32_different_const: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x42487ed8, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x46487ed8, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq float %arg, 0x40490FDB00000000 + %sel = select i1 %cmp, float 0x40C90FDB00000000, float %other + ret float %sel +} + +; Should NOT be folded: fcmp oeq with constant in other position +define float @fcmp_select_no_fold_f32_other_pos(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_no_fold_f32_other_pos: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x42487ed8 +; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f32_other_pos: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x42487ed8, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq float %arg, 0x40490FDB00000000 + %sel = select i1 %cmp, float %other, float 0x40490FDB00000000 + ret float %sel +} + +; Should NOT be folded: unsupported comparison type +define float @fcmp_select_no_fold_f32_unsupported_cmp(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_no_fold_f32_unsupported_cmp: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x42487ed8 +; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f32_unsupported_cmp: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x42487ed8, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x42487ed8, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp olt float %arg, 0x40490FDB00000000 + %sel = select i1 %cmp, float %other, float 0x40490FDB00000000 + ret float %sel +} + +; Should NOT be folded: imm can be encoded into cndmask +define float @fcmp_select_no_fold_f32_enc_imm(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_no_fold_f32_enc_imm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 1.0, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f32_enc_imm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 1.0, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq float %arg, 1.0 + %sel = select i1 %cmp, float 1.0, float %other + ret float %sel +} + +; Should NOT be folded: imm can be encoded into cndmask +define float @fcmp_select_no_fold_f32_enc_imm_2(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_no_fold_f32_enc_imm_2: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, -4.0, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f32_enc_imm_2: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, -4.0, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one float -4.0, %arg + %sel = select i1 %cmp, float %other, float -4.0 + ret float %sel +} + +; Should NOT be folded: fcmp oeq with zero constant +define float @fcmp_select_no_fold_oeq_f32_zero(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_no_fold_oeq_f32_zero: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_oeq_f32_zero: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq float %arg, 0.0 + %sel = select i1 %cmp, float 0.0, float %other + ret float %sel +} + +; Should NOT be folded: fcmp one with negative zero constant +define float @fcmp_select_no_fold_one_f32_negzero(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_no_fold_one_f32_negzero: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_brev_b32 s4, 1 +; GFX900-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_one_f32_negzero: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x80000000, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x80000000, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one float -0.0, %arg ; 0x8000000000000000 + %sel = select i1 %cmp, float %other, float -0.0 ;0x8000000000000000 + ret float %sel +} + +; NaN values should bypass the optimization due to special IEEE 754 behavior +; fcmp oeq with NaN always returns false, so select always chooses %other +define float @fcmp_select_no_fold_oeq_f32_nan(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_no_fold_oeq_f32_nan: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_oeq_f32_nan: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_mov_b32_e32 v0, v1 +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq float %arg, 0x7FF8000000000000 + %sel = select i1 %cmp, float 0x7FF8000000000000, float %other + ret float %sel +} + +; NaN values should bypass the optimization due to special IEEE 754 behavior +; fcmp one with NaN always returns false, so select always chooses the NaN constant +define float @fcmp_select_no_fold_one_f32_nan(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_no_fold_one_f32_nan: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v0, 0x7fc00000 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_one_f32_nan: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7fc00000 +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one float 0x7FF8000000000000, %arg + %sel = select i1 %cmp, float %other, float 0x7FF8000000000000 + ret float %sel +} + +; Should NOT be folded: fcmp one with positive infinity +; Infinity values should bypass the optimization, generating unfolded code +define float @fcmp_select_no_fold_posinf_oeq_f32(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_no_fold_posinf_oeq_f32: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_posinf_oeq_f32: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x7f800000, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq float %arg, 0x7FF0000000000000 + %sel = select i1 %cmp, float 0x7FF0000000000000, float %other + ret float %sel +} + +; Should NOT be folded: fcmp one with negative infinity +; Infinity values should bypass the optimization, generating unfolded code +define float @fcmp_select_no_fold_neginf_f32_one(float %arg, float %other) { +; GFX900-LABEL: fcmp_select_no_fold_neginf_f32_one: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0xff800000 +; GFX900-NEXT: v_mov_b32_e32 v2, 0xff800000 +; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_neginf_f32_one: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0xff800000, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xff800000, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one float 0xFFF0000000000000, %arg + %sel = select i1 %cmp, float %other, float 0xFFF0000000000000 + ret float %sel +} + +;------------------------------------------------------------------------------ +; F64 Tests +;------------------------------------------------------------------------------ + +; Should be folded: f64 fcmp oeq + select with constant in true value +define double @fcmp_select_fold_oeq_f64_imm(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_fold_oeq_f64_imm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 +; GFX900-NEXT: s_mov_b32 s5, 0x400921fb +; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1] +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_oeq_f64_imm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 +; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb +; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq double %arg, 3.141592653589793 + %sel = select i1 %cmp, double 3.141592653589793, double %other + ret double %sel +} +; Should be folded: f64 fcmp oeq + select with constant in true value (commutative) +define double @fcmp_select_fold_oeq_imm_f64(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_fold_oeq_imm_f64: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 +; GFX900-NEXT: s_mov_b32 s5, 0x400921fb +; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1] +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f64: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 +; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb +; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq double 3.141592653589793, %arg + %sel = select i1 %cmp, double 3.141592653589793, double %other + ret double %sel +} + +; Should be folded: f64 fcmp one + select with constant in false value +define double @fcmp_select_fold_one_f64_imm(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_fold_one_f64_imm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x8b145769 +; GFX900-NEXT: s_mov_b32 s5, 0x4005bf0a +; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1] +; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_one_f64_imm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_mov_b32 s4, 0x8b145769 +; GFX1010-NEXT: s_mov_b32 s5, 0x4005bf0a +; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one double %arg, 2.718281828459045 + %sel = select i1 %cmp, double %other, double 2.718281828459045 + ret double %sel +} +; Should be folded: f64 fcmp one + select with constant in false value (commutative) +define double @fcmp_select_fold_one_imm_f64(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_fold_one_imm_f64: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x8b145769 +; GFX900-NEXT: s_mov_b32 s5, 0x4005bf0a +; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1] +; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_one_imm_f64: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_mov_b32 s4, 0x8b145769 +; GFX1010-NEXT: s_mov_b32 s5, 0x4005bf0a +; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one double 2.718281828459045, %arg + %sel = select i1 %cmp, double %other, double 2.718281828459045 + ret double %sel +} + +; Should NOT be folded: f64 fcmp oeq with constant in other position +define double @fcmp_select_no_fold_f64_other_pos(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_f64_other_pos: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 +; GFX900-NEXT: s_mov_b32 s5, 0x400921fb +; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1] +; GFX900-NEXT: v_mov_b32_e32 v4, 0x54442d18 +; GFX900-NEXT: v_mov_b32_e32 v1, 0x400921fb +; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f64_other_pos: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 +; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb +; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x54442d18, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x400921fb, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq double %arg, 3.141592653589793 + %sel = select i1 %cmp, double %other, double 3.141592653589793 + ret double %sel +} + +; Should NOT be folded: f64 fcmp unsupported comparison type +define double @fcmp_select_no_fold_f64_unsupported_cmp(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_f64_unsupported_cmp: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 +; GFX900-NEXT: s_mov_b32 s5, 0x400921fb +; GFX900-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1] +; GFX900-NEXT: v_mov_b32_e32 v4, 0x54442d18 +; GFX900-NEXT: v_mov_b32_e32 v1, 0x400921fb +; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f64_unsupported_cmp: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 +; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb +; GFX1010-NEXT: v_cmp_gt_f64_e32 vcc_lo, s[4:5], v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x54442d18, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x400921fb, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp olt double %arg, 3.141592653589793 + %sel = select i1 %cmp, double %other, double 3.141592653589793 + ret double %sel +} + +; Should NOT be folded: imm can be encoded into cndmask +define double @fcmp_select_no_fold_f64_enc_imm(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_f64_enc_imm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, 1.0, v[0:1] +; GFX900-NEXT: v_mov_b32_e32 v1, 0x3ff00000 +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f64_enc_imm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 1.0, v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x3ff00000, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq double %arg, 1.0 + %sel = select i1 %cmp, double 1.0, double %other + ret double %sel +} + +; Should NOT be folded: imm can be encoded into cndmask +define double @fcmp_select_no_fold_f64_enc_imm_2(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_f64_enc_imm_2: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, -4.0, v[0:1] +; GFX900-NEXT: v_mov_b32_e32 v1, 0xc0100000 +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f64_enc_imm_2: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, -4.0, v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xc0100000, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one double -4.0, %arg + %sel = select i1 %cmp, double %other, double -4.0 + ret double %sel +} + +; Should NOT be folded: f64 fcmp oeq with zero constant +define double @fcmp_select_no_fold_oeq_f64_zero(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_oeq_f64_zero: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, 0, v[0:1] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_oeq_f64_zero: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0, v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq double %arg, 0.0 + %sel = select i1 %cmp, double 0.0, double %other + ret double %sel +} + +; Should NOT be folded: f64 fcmp one with negative zero constant +define double @fcmp_select_no_fold_one_f64_negzero(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_one_f64_negzero: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0 +; GFX900-NEXT: s_brev_b32 s5, 1 +; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1] +; GFX900-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_one_f64_negzero: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, 0x80000000, v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x80000000, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one double -0.0, %arg + %sel = select i1 %cmp, double %other, double -0.0 + ret double %sel +} + +; Should NOT be folded: f64 different constants +define double @fcmp_select_no_fold_f64_different_const(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_f64_different_const: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x54442d18 +; GFX900-NEXT: s_mov_b32 s5, 0x400921fb +; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] +; GFX900-NEXT: v_mov_b32_e32 v4, 0x8b145769 +; GFX900-NEXT: v_mov_b32_e32 v1, 0x4005bf0a +; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f64_different_const: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18 +; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb +; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, s[4:5], v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x8b145769, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x4005bf0a, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq double %arg, 3.141592653589793 + %sel = select i1 %cmp, double 2.718281828459045, double %other + ret double %sel +} + +; Should NOT be folded: f64 fcmp oeq with NaN constant +; fcmp oeq with NaN always returns false, so select always chooses %other +define double @fcmp_select_no_fold_nan_f64(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_nan_f64: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v1, v3 +; GFX900-NEXT: v_mov_b32_e32 v0, v2 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_nan_f64: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_mov_b32_e32 v1, v3 +; GFX1010-NEXT: v_mov_b32_e32 v0, v2 +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq double %arg, 0x7FF8000000000000 + %sel = select i1 %cmp, double 0x7FF8000000000000, double %other + ret double %sel +} + +; Should NOT be folded: f64 fcmp oeq with NaN constant (commutative variant) +; fcmp oeq with NaN always returns false, so select always chooses %other +define double @fcmp_select_no_fold_nan_f64_comm(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_nan_f64_comm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v1, v3 +; GFX900-NEXT: v_mov_b32_e32 v0, v2 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_comm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_mov_b32_e32 v1, v3 +; GFX1010-NEXT: v_mov_b32_e32 v0, v2 +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq double 0x7FF8000000000000, %arg + %sel = select i1 %cmp, double 0x7FF8000000000000, double %other + ret double %sel +} + +; Should NOT be folded: f64 fcmp one with NaN constant +; fcmp one with NaN always returns false, so select always chooses the NaN constant +define double @fcmp_select_no_fold_nan_f64_one(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_nan_f64_one: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v0, 0 +; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_one: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_mov_b32_e32 v0, 0 +; GFX1010-NEXT: v_mov_b32_e32 v1, 0x7ff80000 +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one double %arg, 0x7FF8000000000000 + %sel = select i1 %cmp, double %other, double 0x7FF8000000000000 + ret double %sel +} + +; Should NOT be folded: f64 fcmp one with NaN constant (commutative variant) +; fcmp one with NaN always returns false, so select always chooses the NaN constant +define double @fcmp_select_no_fold_nan_f64_one_comm(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_nan_f64_one_comm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v0, 0 +; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_one_comm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_mov_b32_e32 v0, 0 +; GFX1010-NEXT: v_mov_b32_e32 v1, 0x7ff80000 +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one double 0x7FF8000000000000, %arg + %sel = select i1 %cmp, double %other, double 0x7FF8000000000000 + ret double %sel +} + +; Should NOT be folded: f64 fcmp oeq with positive infinity +; Infinity values should bypass the optimization, generating unfolded code +define double @fcmp_select_no_fold_posinf_f64(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_posinf_f64: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0 +; GFX900-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] +; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff00000 +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_posinf_f64: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x7ff00000, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq double %arg, 0x7FF0000000000000 + %sel = select i1 %cmp, double 0x7FF0000000000000, double %other + ret double %sel +} + +; Should NOT be folded: f64 fcmp oeq with negative infinity +; Infinity values should bypass the optimization, generating unfolded code +define double @fcmp_select_no_fold_neginf_f64(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_neginf_f64: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0 +; GFX900-NEXT: s_mov_b32 s5, 0xfff00000 +; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] +; GFX900-NEXT: v_mov_b32_e32 v1, 0xfff00000 +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_neginf_f64: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0xfff00000, v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xfff00000, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq double %arg, 0xFFF0000000000000 + %sel = select i1 %cmp, double 0xFFF0000000000000, double %other + ret double %sel +} + +; Should NOT be folded: f64 fcmp oeq with positive infinity (commutative variant) +; Infinity values should bypass the optimization, generating unfolded code +define double @fcmp_select_no_fold_posinf_f64_comm(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_posinf_f64_comm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0 +; GFX900-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] +; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff00000 +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_posinf_f64_comm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x7ff00000, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq double 0x7FF0000000000000, %arg + %sel = select i1 %cmp, double 0x7FF0000000000000, double %other + ret double %sel +} + +; Should NOT be folded: f64 fcmp oeq with negative infinity (commutative variant) +; Infinity values should bypass the optimization, generating unfolded code +define double @fcmp_select_no_fold_neginf_f64_comm(double %arg, double %other) { +; GFX900-LABEL: fcmp_select_no_fold_neginf_f64_comm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0 +; GFX900-NEXT: s_mov_b32 s5, 0xfff00000 +; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1] +; GFX900-NEXT: v_mov_b32_e32 v1, 0xfff00000 +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_neginf_f64_comm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0xfff00000, v[0:1] +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xfff00000, v3, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq double 0xFFF0000000000000, %arg + %sel = select i1 %cmp, double 0xFFF0000000000000, double %other + ret double %sel +} + +;------------------------------------------------------------------------------ +; F16 Tests +;------------------------------------------------------------------------------ + +; Should be folded: f16 fcmp oeq + select with constant in true value +define half @fcmp_select_fold_oeq_f16_imm(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_fold_oeq_f16_imm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_movk_i32 s4, 0x4248 +; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_oeq_f16_imm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq half %arg, 0xH4248 + %sel = select i1 %cmp, half 0xH4248, half %other + ret half %sel +} + +; Should be folded: f16 fcmp oeq + select with constant in true value (commutative) +define half @fcmp_select_fold_oeq_imm_f16(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_fold_oeq_imm_f16: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_movk_i32 s4, 0x4248 +; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f16: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq half 0xH4248, %arg + %sel = select i1 %cmp, half 0xH4248, half %other + ret half %sel +} + +; Should be folded: f16 fcmp one + select with constant in false value +define half @fcmp_select_fold_one_f16_imm(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_fold_one_f16_imm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_movk_i32 s4, 0x4020 +; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_one_f16_imm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x4020, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one half %arg, 0xH4020 + %sel = select i1 %cmp, half %other, half 0xH4020 + ret half %sel +} + +; Should be folded: f16 fcmp one + select with constant in false value (commutative) +define half @fcmp_select_fold_one_imm_f16(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_fold_one_imm_f16: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_movk_i32 s4, 0x4020 +; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_one_imm_f16: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x4020, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one half 0xH4020, %arg + %sel = select i1 %cmp, half %other, half 0xH4020 + ret half %sel +} + +; Should NOT be folded: different constants +define half @fcmp_select_no_fold_f16_different_const(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_no_fold_f16_different_const: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_movk_i32 s4, 0x4248 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x4300 +; GFX900-NEXT: v_cmp_neq_f16_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f16_different_const: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0x4248, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4300, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq half %arg, 0xH4248 + %sel = select i1 %cmp, half 0xH4300, half %other + ret half %sel +} + +; Should NOT be folded: NaN values bypass optimization +define half @fcmp_select_no_fold_nan_f16(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_no_fold_nan_f16: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_nan_f16: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_mov_b32_e32 v0, v1 +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq half %arg, 0xH7e00 + %sel = select i1 %cmp, half 0xH7e00, half %other + ret half %sel +} + +; Should NOT be folded: f16 fcmp one with NaN constant +define half @fcmp_select_no_fold_nan_f16_one(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_no_fold_nan_f16_one: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v0, 0x7e00 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_nan_f16_one: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7e00 +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one half %arg, 0xH7e00 + %sel = select i1 %cmp, half %other, half 0xH7e00 + ret half %sel +} + +; Should NOT be folded: f16 fcmp one with +Inf constant +define half @fcmp_select_no_fold_posinf_f16_one(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_no_fold_posinf_f16_one: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_movk_i32 s4, 0x7c00 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x7c00 +; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_posinf_f16_one: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x7c00, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one half %arg, 0xH7c00 + %sel = select i1 %cmp, half %other, half 0xH7c00 + ret half %sel +} + +; Should NOT be folded: f16 fcmp one with -Inf constant +define half @fcmp_select_no_fold_neginf_f16_one(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_no_fold_neginf_f16_one: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0xfc00 +; GFX900-NEXT: v_mov_b32_e32 v2, 0xfc00 +; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_neginf_f16_one: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0xfc00, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xfc00, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one half %arg, 0xHfc00 + %sel = select i1 %cmp, half %other, half 0xHfc00 + ret half %sel +} +; Should NOT be folded: f16 fcmp oeq with zero constant +define half @fcmp_select_no_fold_oeq_f16_zero(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_no_fold_oeq_f16_zero: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_cmp_neq_f16_e32 vcc, 0, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_oeq_f16_zero: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq half %arg, 0xH0000 + %sel = select i1 %cmp, half 0xH0000, half %other + ret half %sel +} +; Should NOT be folded: f16 fcmp one with negative zero constant +define half @fcmp_select_no_fold_one_f16_negzero(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_no_fold_one_f16_negzero: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s4, 0x8000 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x8000 +; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_one_f16_negzero: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x8000, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x8000, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one half 0xH8000, %arg + %sel = select i1 %cmp, half %other, half 0xH8000 + ret half %sel +} + +; Should NOT be folded: f16 fcmp oeq with constant in other position +define half @fcmp_select_no_fold_f16_other_pos(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_no_fold_f16_other_pos: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_movk_i32 s4, 0x4248 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248 +; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f16_other_pos: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq half %arg, 0xH4248 + %sel = select i1 %cmp, half %other, half 0xH4248 + ret half %sel +} + +; Should NOT be folded: f16 unsupported comparison type +define half @fcmp_select_no_fold_f16_unsupported_cmp(half %arg, half %other) { +; GFX900-LABEL: fcmp_select_no_fold_f16_unsupported_cmp: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_movk_i32 s4, 0x4248 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248 +; GFX900-NEXT: v_cmp_gt_f16_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_f16_unsupported_cmp: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0x4248, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp olt half %arg, 0xH4248 + %sel = select i1 %cmp, half %other, half 0xH4248 + ret half %sel +} + +;------------------------------------------------------------------------------ +; BF16 Tests +;------------------------------------------------------------------------------ + +; Should be folded: bfloat fcmp oeq + select with constant in true value +define bfloat @fcmp_select_fold_oeq_bf16_imm(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_fold_oeq_bf16_imm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x42480000 +; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_oeq_bf16_imm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v2 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq bfloat %arg, 0xR4248 + %sel = select i1 %cmp, bfloat 0xR4248, bfloat %other + ret bfloat %sel +} + +; Should be folded: bfloat fcmp oeq + select with constant in true value (commutative) +define bfloat @fcmp_select_fold_oeq_imm_bf16(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_fold_oeq_imm_bf16: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x42480000 +; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_oeq_imm_bf16: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v2 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq bfloat 0xR4248, %arg + %sel = select i1 %cmp, bfloat 0xR4248, bfloat %other + ret bfloat %sel +} + +; Should be folded: bfloat fcmp one + select with constant in false value +define bfloat @fcmp_select_fold_one_bf16_imm(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_fold_one_bf16_imm: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x40200000 +; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v2 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_one_bf16_imm: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x40200000, v2 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one bfloat %arg, 0xR4020 + %sel = select i1 %cmp, bfloat %other, bfloat 0xR4020 + ret bfloat %sel +} + +; Should be folded: bfloat fcmp one + select with constant in false value (commutative) +define bfloat @fcmp_select_fold_one_imm_bf16(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_fold_one_imm_bf16: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x40200000 +; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v2 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_fold_one_imm_bf16: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x40200000, v2 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one bfloat 0xR4020, %arg + %sel = select i1 %cmp, bfloat %other, bfloat 0xR4020 + ret bfloat %sel +} + +; Should NOT be folded: different constants +define bfloat @fcmp_select_no_fold_bf16_different_const(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_no_fold_bf16_different_const: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x42480000 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x4300 +; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_bf16_different_const: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x42480000, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4300, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq bfloat %arg, 0xR4248 + %sel = select i1 %cmp, bfloat 0xR4300, bfloat %other + ret bfloat %sel +} + +; Should NOT be folded: NaN values bypass optimization +define bfloat @fcmp_select_no_fold_nan_bf16(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_no_fold_nan_bf16: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_nan_bf16: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_mov_b32_e32 v0, v1 +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq bfloat %arg, 0xR7FC0 + %sel = select i1 %cmp, bfloat 0xR7FC0, bfloat %other + ret bfloat %sel +} + +; Should NOT be folded: bfloat fcmp one with NaN constant +define bfloat @fcmp_select_no_fold_nan_bf16_one(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_no_fold_nan_bf16_one: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v0, 0x7fc0 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_nan_bf16_one: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7fc0 +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one bfloat %arg, 0xR7FC0 + %sel = select i1 %cmp, bfloat %other, bfloat 0xR7FC0 + ret bfloat %sel +} + +; Should NOT be folded: bfloat fcmp one with +Inf constant +define bfloat @fcmp_select_no_fold_posinf_bf16_one(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_no_fold_posinf_bf16_one: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f80 +; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_posinf_bf16_one: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x7f800000, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7f80, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one bfloat %arg, 0xR7F80 + %sel = select i1 %cmp, bfloat %other, bfloat 0xR7F80 + ret bfloat %sel +} + +; Should NOT be folded: bfloat fcmp one with -Inf constant +define bfloat @fcmp_select_no_fold_neginf_bf16_one(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_no_fold_neginf_bf16_one: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX900-NEXT: s_mov_b32 s4, 0xff800000 +; GFX900-NEXT: v_mov_b32_e32 v2, 0xffffff80 +; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_neginf_bf16_one: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0xff800000, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xffffff80, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one bfloat %arg, 0xRFF80 + %sel = select i1 %cmp, bfloat %other, bfloat 0xRFF80 + ret bfloat %sel +} + +; Should NOT be folded: bfloat fcmp oeq with zero constant +define bfloat @fcmp_select_no_fold_oeq_bf16_zero(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_no_fold_oeq_bf16_zero: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_oeq_bf16_zero: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq bfloat %arg, 0xR0000 + %sel = select i1 %cmp, bfloat 0xR0000, bfloat %other + ret bfloat %sel +} + +; Should NOT be folded: bfloat fcmp one with negative zero constant +define bfloat @fcmp_select_no_fold_one_bf16_negzero(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_no_fold_one_bf16_negzero: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX900-NEXT: s_brev_b32 s4, 1 +; GFX900-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_one_bf16_negzero: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x80000000, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xffff8000, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp one bfloat 0xR8000, %arg + %sel = select i1 %cmp, bfloat %other, bfloat 0xR8000 + ret bfloat %sel +} + +; Should NOT be folded: bfloat fcmp oeq with constant in other position +define bfloat @fcmp_select_no_fold_bf16_other_pos(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_no_fold_bf16_other_pos: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x42480000 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248 +; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_bf16_other_pos: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp oeq bfloat %arg, 0xR4248 + %sel = select i1 %cmp, bfloat %other, bfloat 0xR4248 + ret bfloat %sel +} + +; Should NOT be folded: bfloat unsupported comparison type +define bfloat @fcmp_select_no_fold_bf16_unsupported_cmp(bfloat %arg, bfloat %other) { +; GFX900-LABEL: fcmp_select_no_fold_bf16_unsupported_cmp: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX900-NEXT: s_mov_b32 s4, 0x42480000 +; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248 +; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: fcmp_select_no_fold_bf16_unsupported_cmp: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1010-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x42480000, v0 +; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo +; GFX1010-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = fcmp olt bfloat %arg, 0xR4248 + %sel = select i1 %cmp, bfloat %other, bfloat 0xR4248 + ret bfloat %sel +} |