aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/select-cmp-shared-constant-fp.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/select-cmp-shared-constant-fp.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/select-cmp-shared-constant-fp.ll1429
1 files changed, 1429 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/select-cmp-shared-constant-fp.ll b/llvm/test/CodeGen/AMDGPU/select-cmp-shared-constant-fp.ll
new file mode 100644
index 0000000..11af704
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/select-cmp-shared-constant-fp.ll
@@ -0,0 +1,1429 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX1010 %s
+
+; Test the CMP+SELECT optimization that folds shared constants to reduce
+; register pressure.
+
+;------------------------------------------------------------------------------
+; F32 Tests
+;------------------------------------------------------------------------------
+
+; Should be folded: fcmp oeq + select with constant in true value
+define float @fcmp_select_fold_oeq_f32_imm(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_fold_oeq_f32_imm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
+; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_oeq_f32_imm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq float %arg, 0x40490FDB00000000
+ %sel = select i1 %cmp, float 0x40490FDB00000000, float %other
+ ret float %sel
+}
+
+; Should be folded: fcmp oeq + select with constant in true value (commutative)
+define float @fcmp_select_fold_oeq_imm_f32(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_fold_oeq_imm_f32:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
+; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f32:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq float 0x40490FDB00000000, %arg
+ %sel = select i1 %cmp, float 0x40490FDB00000000, float %other
+ ret float %sel
+}
+
+; Should be folded: fcmp one + select with constant in false value
+define float @fcmp_select_fold_one_f32_imm(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_fold_one_f32_imm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x402df850
+; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_one_f32_imm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x402df850, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one float %arg, 0x4005BF0A00000000
+ %sel = select i1 %cmp, float %other, float 0x4005BF0A00000000
+ ret float %sel
+}
+
+; Should be folded: fcmp one + select with constant in false value (commutative)
+define float @fcmp_select_fold_one_imm_f32(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_fold_one_imm_f32:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x402df850
+; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_one_imm_f32:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x402df850, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one float 0x4005BF0A00000000, %arg
+ %sel = select i1 %cmp, float %other, float 0x4005BF0A00000000
+ ret float %sel
+}
+
+; Should NOT be folded: different constants
+define float @fcmp_select_no_fold_f32_different_const(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f32_different_const:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x46487ed8
+; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f32_different_const:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x42487ed8, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x46487ed8, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq float %arg, 0x40490FDB00000000
+ %sel = select i1 %cmp, float 0x40C90FDB00000000, float %other
+ ret float %sel
+}
+
+; Should NOT be folded: fcmp oeq with constant in other position
+define float @fcmp_select_no_fold_f32_other_pos(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f32_other_pos:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x42487ed8
+; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f32_other_pos:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42487ed8, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x42487ed8, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq float %arg, 0x40490FDB00000000
+ %sel = select i1 %cmp, float %other, float 0x40490FDB00000000
+ ret float %sel
+}
+
+; Should NOT be folded: unsupported comparison type
+define float @fcmp_select_no_fold_f32_unsupported_cmp(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f32_unsupported_cmp:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x42487ed8
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x42487ed8
+; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f32_unsupported_cmp:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x42487ed8, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x42487ed8, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp olt float %arg, 0x40490FDB00000000
+ %sel = select i1 %cmp, float %other, float 0x40490FDB00000000
+ ret float %sel
+}
+
+; Should NOT be folded: imm can be encoded into cndmask
+define float @fcmp_select_no_fold_f32_enc_imm(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f32_enc_imm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 1.0, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f32_enc_imm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 1.0, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq float %arg, 1.0
+ %sel = select i1 %cmp, float 1.0, float %other
+ ret float %sel
+}
+
+; Should NOT be folded: imm can be encoded into cndmask
+define float @fcmp_select_no_fold_f32_enc_imm_2(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f32_enc_imm_2:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, -4.0, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f32_enc_imm_2:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, -4.0, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one float -4.0, %arg
+ %sel = select i1 %cmp, float %other, float -4.0
+ ret float %sel
+}
+
+; Should NOT be folded: fcmp oeq with zero constant
+define float @fcmp_select_no_fold_oeq_f32_zero(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_no_fold_oeq_f32_zero:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_oeq_f32_zero:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq float %arg, 0.0
+ %sel = select i1 %cmp, float 0.0, float %other
+ ret float %sel
+}
+
+; Should NOT be folded: fcmp one with negative zero constant
+define float @fcmp_select_no_fold_one_f32_negzero(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_no_fold_one_f32_negzero:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_brev_b32 s4, 1
+; GFX900-NEXT: v_bfrev_b32_e32 v2, 1
+; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_one_f32_negzero:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x80000000, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x80000000, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one float -0.0, %arg ; 0x8000000000000000
+ %sel = select i1 %cmp, float %other, float -0.0 ;0x8000000000000000
+ ret float %sel
+}
+
+; NaN values should bypass the optimization due to special IEEE 754 behavior
+; fcmp oeq with NaN always returns false, so select always chooses %other
+define float @fcmp_select_no_fold_oeq_f32_nan(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_no_fold_oeq_f32_nan:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_oeq_f32_nan:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_mov_b32_e32 v0, v1
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq float %arg, 0x7FF8000000000000
+ %sel = select i1 %cmp, float 0x7FF8000000000000, float %other
+ ret float %sel
+}
+
+; NaN values should bypass the optimization due to special IEEE 754 behavior
+; fcmp one with NaN always returns false, so select always chooses the NaN constant
+define float @fcmp_select_no_fold_one_f32_nan(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_no_fold_one_f32_nan:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0x7fc00000
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_one_f32_nan:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7fc00000
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one float 0x7FF8000000000000, %arg
+ %sel = select i1 %cmp, float %other, float 0x7FF8000000000000
+ ret float %sel
+}
+
+; Should NOT be folded: fcmp one with positive infinity
+; Infinity values should bypass the optimization, generating unfolded code
+define float @fcmp_select_no_fold_posinf_oeq_f32(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_no_fold_posinf_oeq_f32:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_posinf_oeq_f32:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x7f800000, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq float %arg, 0x7FF0000000000000
+ %sel = select i1 %cmp, float 0x7FF0000000000000, float %other
+ ret float %sel
+}
+
+; Should NOT be folded: fcmp one with negative infinity
+; Infinity values should bypass the optimization, generating unfolded code
+define float @fcmp_select_no_fold_neginf_f32_one(float %arg, float %other) {
+; GFX900-LABEL: fcmp_select_no_fold_neginf_f32_one:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0xff800000
+; GFX900-NEXT: v_mov_b32_e32 v2, 0xff800000
+; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_neginf_f32_one:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0xff800000, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xff800000, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one float 0xFFF0000000000000, %arg
+ %sel = select i1 %cmp, float %other, float 0xFFF0000000000000
+ ret float %sel
+}
+
+;------------------------------------------------------------------------------
+; F64 Tests
+;------------------------------------------------------------------------------
+
+; Should be folded: f64 fcmp oeq + select with constant in true value
+define double @fcmp_select_fold_oeq_f64_imm(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_fold_oeq_f64_imm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
+; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
+; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_oeq_f64_imm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
+; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
+; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq double %arg, 3.141592653589793
+ %sel = select i1 %cmp, double 3.141592653589793, double %other
+ ret double %sel
+}
+; Should be folded: f64 fcmp oeq + select with constant in true value (commutative)
+define double @fcmp_select_fold_oeq_imm_f64(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_fold_oeq_imm_f64:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
+; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
+; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f64:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
+; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
+; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq double 3.141592653589793, %arg
+ %sel = select i1 %cmp, double 3.141592653589793, double %other
+ ret double %sel
+}
+
+; Should be folded: f64 fcmp one + select with constant in false value
+define double @fcmp_select_fold_one_f64_imm(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_fold_one_f64_imm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x8b145769
+; GFX900-NEXT: s_mov_b32 s5, 0x4005bf0a
+; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1]
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_one_f64_imm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: s_mov_b32 s4, 0x8b145769
+; GFX1010-NEXT: s_mov_b32 s5, 0x4005bf0a
+; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one double %arg, 2.718281828459045
+ %sel = select i1 %cmp, double %other, double 2.718281828459045
+ ret double %sel
+}
+; Should be folded: f64 fcmp one + select with constant in false value (commutative)
+define double @fcmp_select_fold_one_imm_f64(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_fold_one_imm_f64:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x8b145769
+; GFX900-NEXT: s_mov_b32 s5, 0x4005bf0a
+; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1]
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_one_imm_f64:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: s_mov_b32 s4, 0x8b145769
+; GFX1010-NEXT: s_mov_b32 s5, 0x4005bf0a
+; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one double 2.718281828459045, %arg
+ %sel = select i1 %cmp, double %other, double 2.718281828459045
+ ret double %sel
+}
+
+; Should NOT be folded: f64 fcmp oeq with constant in other position
+define double @fcmp_select_no_fold_f64_other_pos(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f64_other_pos:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
+; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
+; GFX900-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v4, 0x54442d18
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x400921fb
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f64_other_pos:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
+; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
+; GFX1010-NEXT: v_cmp_eq_f64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x54442d18, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x400921fb, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq double %arg, 3.141592653589793
+ %sel = select i1 %cmp, double %other, double 3.141592653589793
+ ret double %sel
+}
+
+; Should NOT be folded: f64 fcmp unsupported comparison type
+define double @fcmp_select_no_fold_f64_unsupported_cmp(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f64_unsupported_cmp:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
+; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
+; GFX900-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v4, 0x54442d18
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x400921fb
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f64_unsupported_cmp:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
+; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
+; GFX1010-NEXT: v_cmp_gt_f64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x54442d18, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x400921fb, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp olt double %arg, 3.141592653589793
+ %sel = select i1 %cmp, double %other, double 3.141592653589793
+ ret double %sel
+}
+
+; Should NOT be folded: imm can be encoded into cndmask
+define double @fcmp_select_no_fold_f64_enc_imm(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f64_enc_imm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, 1.0, v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x3ff00000
+; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f64_enc_imm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 1.0, v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x3ff00000, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq double %arg, 1.0
+ %sel = select i1 %cmp, double 1.0, double %other
+ ret double %sel
+}
+
+; Should NOT be folded: imm can be encoded into cndmask
+define double @fcmp_select_no_fold_f64_enc_imm_2(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f64_enc_imm_2:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, -4.0, v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0xc0100000
+; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f64_enc_imm_2:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, -4.0, v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xc0100000, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one double -4.0, %arg
+ %sel = select i1 %cmp, double %other, double -4.0
+ ret double %sel
+}
+
+; Should NOT be folded: f64 fcmp oeq with zero constant
+define double @fcmp_select_no_fold_oeq_f64_zero(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_oeq_f64_zero:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, 0, v[0:1]
+; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_oeq_f64_zero:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0, v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq double %arg, 0.0
+ %sel = select i1 %cmp, double 0.0, double %other
+ ret double %sel
+}
+
+; Should NOT be folded: f64 fcmp one with negative zero constant
+define double @fcmp_select_no_fold_one_f64_negzero(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_one_f64_negzero:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0
+; GFX900-NEXT: s_brev_b32 s5, 1
+; GFX900-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[0:1]
+; GFX900-NEXT: v_bfrev_b32_e32 v1, 1
+; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_one_f64_negzero:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_lg_f64_e32 vcc_lo, 0x80000000, v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x80000000, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one double -0.0, %arg
+ %sel = select i1 %cmp, double %other, double -0.0
+ ret double %sel
+}
+
+; Should NOT be folded: f64 different constants
+define double @fcmp_select_no_fold_f64_different_const(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f64_different_const:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x54442d18
+; GFX900-NEXT: s_mov_b32 s5, 0x400921fb
+; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v4, 0x8b145769
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x4005bf0a
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f64_different_const:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: s_mov_b32 s4, 0x54442d18
+; GFX1010-NEXT: s_mov_b32 s5, 0x400921fb
+; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x8b145769, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x4005bf0a, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq double %arg, 3.141592653589793
+ %sel = select i1 %cmp, double 2.718281828459045, double %other
+ ret double %sel
+}
+
+; Should NOT be folded: f64 fcmp oeq with NaN constant
+; fcmp oeq with NaN always returns false, so select always chooses %other
+define double @fcmp_select_no_fold_nan_f64(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_nan_f64:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_nan_f64:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_mov_b32_e32 v1, v3
+; GFX1010-NEXT: v_mov_b32_e32 v0, v2
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq double %arg, 0x7FF8000000000000
+ %sel = select i1 %cmp, double 0x7FF8000000000000, double %other
+ ret double %sel
+}
+
+; Should NOT be folded: f64 fcmp oeq with NaN constant (commutative variant)
+; fcmp oeq with NaN always returns false, so select always chooses %other
+define double @fcmp_select_no_fold_nan_f64_comm(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_nan_f64_comm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_comm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_mov_b32_e32 v1, v3
+; GFX1010-NEXT: v_mov_b32_e32 v0, v2
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq double 0x7FF8000000000000, %arg
+ %sel = select i1 %cmp, double 0x7FF8000000000000, double %other
+ ret double %sel
+}
+
+; Should NOT be folded: f64 fcmp one with NaN constant
+; fcmp one with NaN always returns false, so select always chooses the NaN constant
+define double @fcmp_select_no_fold_nan_f64_one(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_nan_f64_one:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_one:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_mov_b32_e32 v0, 0
+; GFX1010-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one double %arg, 0x7FF8000000000000
+ %sel = select i1 %cmp, double %other, double 0x7FF8000000000000
+ ret double %sel
+}
+
+; Should NOT be folded: f64 fcmp one with NaN constant (commutative variant)
+; fcmp one with NaN always returns false, so select always chooses the NaN constant
+define double @fcmp_select_no_fold_nan_f64_one_comm(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_nan_f64_one_comm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_nan_f64_one_comm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_mov_b32_e32 v0, 0
+; GFX1010-NEXT: v_mov_b32_e32 v1, 0x7ff80000
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one double 0x7FF8000000000000, %arg
+ %sel = select i1 %cmp, double %other, double 0x7FF8000000000000
+ ret double %sel
+}
+
+; Should NOT be folded: f64 fcmp oeq with positive infinity
+; Infinity values should bypass the optimization, generating unfolded code
+define double @fcmp_select_no_fold_posinf_f64(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_posinf_f64:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0
+; GFX900-NEXT: s_mov_b32 s5, 0x7ff00000
+; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff00000
+; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_posinf_f64:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x7ff00000, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq double %arg, 0x7FF0000000000000
+ %sel = select i1 %cmp, double 0x7FF0000000000000, double %other
+ ret double %sel
+}
+
+; Should NOT be folded: f64 fcmp oeq with negative infinity
+; Infinity values should bypass the optimization, generating unfolded code
+define double @fcmp_select_no_fold_neginf_f64(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_neginf_f64:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0
+; GFX900-NEXT: s_mov_b32 s5, 0xfff00000
+; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0xfff00000
+; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_neginf_f64:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0xfff00000, v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xfff00000, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq double %arg, 0xFFF0000000000000
+ %sel = select i1 %cmp, double 0xFFF0000000000000, double %other
+ ret double %sel
+}
+
+; Should NOT be folded: f64 fcmp oeq with positive infinity (commutative variant)
+; Infinity values should bypass the optimization, generating unfolded code
+define double @fcmp_select_no_fold_posinf_f64_comm(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_posinf_f64_comm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0
+; GFX900-NEXT: s_mov_b32 s5, 0x7ff00000
+; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff00000
+; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_posinf_f64_comm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x7ff00000, v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0x7ff00000, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq double 0x7FF0000000000000, %arg
+ %sel = select i1 %cmp, double 0x7FF0000000000000, double %other
+ ret double %sel
+}
+
+; Should NOT be folded: f64 fcmp oeq with negative infinity (commutative variant)
+; Infinity values should bypass the optimization, generating unfolded code
+define double @fcmp_select_no_fold_neginf_f64_comm(double %arg, double %other) {
+; GFX900-LABEL: fcmp_select_no_fold_neginf_f64_comm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0
+; GFX900-NEXT: s_mov_b32 s5, 0xfff00000
+; GFX900-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
+; GFX900-NEXT: v_mov_b32_e32 v1, 0xfff00000
+; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_neginf_f64_comm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0xfff00000, v[0:1]
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e32 v1, 0xfff00000, v3, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq double 0xFFF0000000000000, %arg
+ %sel = select i1 %cmp, double 0xFFF0000000000000, double %other
+ ret double %sel
+}
+
+;------------------------------------------------------------------------------
+; F16 Tests
+;------------------------------------------------------------------------------
+
+; Should be folded: f16 fcmp oeq + select with constant in true value
+define half @fcmp_select_fold_oeq_f16_imm(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_fold_oeq_f16_imm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_movk_i32 s4, 0x4248
+; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_oeq_f16_imm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq half %arg, 0xH4248
+ %sel = select i1 %cmp, half 0xH4248, half %other
+ ret half %sel
+}
+
+; Should be folded: f16 fcmp oeq + select with constant in true value (commutative)
+define half @fcmp_select_fold_oeq_imm_f16(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_fold_oeq_imm_f16:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_movk_i32 s4, 0x4248
+; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_oeq_imm_f16:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq half 0xH4248, %arg
+ %sel = select i1 %cmp, half 0xH4248, half %other
+ ret half %sel
+}
+
+; Should be folded: f16 fcmp one + select with constant in false value
+define half @fcmp_select_fold_one_f16_imm(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_fold_one_f16_imm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_movk_i32 s4, 0x4020
+; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_one_f16_imm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x4020, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one half %arg, 0xH4020
+ %sel = select i1 %cmp, half %other, half 0xH4020
+ ret half %sel
+}
+
+; Should be folded: f16 fcmp one + select with constant in false value (commutative)
+define half @fcmp_select_fold_one_imm_f16(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_fold_one_imm_f16:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_movk_i32 s4, 0x4020
+; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_one_imm_f16:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x4020, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one half 0xH4020, %arg
+ %sel = select i1 %cmp, half %other, half 0xH4020
+ ret half %sel
+}
+
+; Should NOT be folded: different constants
+define half @fcmp_select_no_fold_f16_different_const(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f16_different_const:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_movk_i32 s4, 0x4248
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x4300
+; GFX900-NEXT: v_cmp_neq_f16_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f16_different_const:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0x4248, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4300, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq half %arg, 0xH4248
+ %sel = select i1 %cmp, half 0xH4300, half %other
+ ret half %sel
+}
+
+; Should NOT be folded: NaN values bypass optimization
+define half @fcmp_select_no_fold_nan_f16(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_no_fold_nan_f16:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_nan_f16:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_mov_b32_e32 v0, v1
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq half %arg, 0xH7e00
+ %sel = select i1 %cmp, half 0xH7e00, half %other
+ ret half %sel
+}
+
+; Should NOT be folded: f16 fcmp one with NaN constant
+define half @fcmp_select_no_fold_nan_f16_one(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_no_fold_nan_f16_one:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0x7e00
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_nan_f16_one:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7e00
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one half %arg, 0xH7e00
+ %sel = select i1 %cmp, half %other, half 0xH7e00
+ ret half %sel
+}
+
+; Should NOT be folded: f16 fcmp one with +Inf constant
+define half @fcmp_select_no_fold_posinf_f16_one(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_no_fold_posinf_f16_one:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_movk_i32 s4, 0x7c00
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7c00
+; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_posinf_f16_one:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x7c00, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one half %arg, 0xH7c00
+ %sel = select i1 %cmp, half %other, half 0xH7c00
+ ret half %sel
+}
+
+; Should NOT be folded: f16 fcmp one with -Inf constant
+define half @fcmp_select_no_fold_neginf_f16_one(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_no_fold_neginf_f16_one:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0xfc00
+; GFX900-NEXT: v_mov_b32_e32 v2, 0xfc00
+; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_neginf_f16_one:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0xfc00, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xfc00, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one half %arg, 0xHfc00
+ %sel = select i1 %cmp, half %other, half 0xHfc00
+ ret half %sel
+}
+; Should NOT be folded: f16 fcmp oeq with zero constant
+define half @fcmp_select_no_fold_oeq_f16_zero(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_no_fold_oeq_f16_zero:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_cmp_neq_f16_e32 vcc, 0, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_oeq_f16_zero:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq half %arg, 0xH0000
+ %sel = select i1 %cmp, half 0xH0000, half %other
+ ret half %sel
+}
+; Should NOT be folded: f16 fcmp one with negative zero constant
+define half @fcmp_select_no_fold_one_f16_negzero(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_no_fold_one_f16_negzero:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_mov_b32 s4, 0x8000
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x8000
+; GFX900-NEXT: v_cmp_lg_f16_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_one_f16_negzero:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0x8000, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x8000, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one half 0xH8000, %arg
+ %sel = select i1 %cmp, half %other, half 0xH8000
+ ret half %sel
+}
+
+; Should NOT be folded: f16 fcmp oeq with constant in other position
+define half @fcmp_select_no_fold_f16_other_pos(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f16_other_pos:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_movk_i32 s4, 0x4248
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248
+; GFX900-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f16_other_pos:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x4248, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq half %arg, 0xH4248
+ %sel = select i1 %cmp, half %other, half 0xH4248
+ ret half %sel
+}
+
+; Should NOT be folded: f16 unsupported comparison type
+define half @fcmp_select_no_fold_f16_unsupported_cmp(half %arg, half %other) {
+; GFX900-LABEL: fcmp_select_no_fold_f16_unsupported_cmp:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_movk_i32 s4, 0x4248
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248
+; GFX900-NEXT: v_cmp_gt_f16_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_f16_unsupported_cmp:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0x4248, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp olt half %arg, 0xH4248
+ %sel = select i1 %cmp, half %other, half 0xH4248
+ ret half %sel
+}
+
+;------------------------------------------------------------------------------
+; BF16 Tests
+;------------------------------------------------------------------------------
+
+; Should be folded: bfloat fcmp oeq + select with constant in true value
+define bfloat @fcmp_select_fold_oeq_bf16_imm(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_fold_oeq_bf16_imm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT: s_mov_b32 s4, 0x42480000
+; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_oeq_bf16_imm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v2
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq bfloat %arg, 0xR4248
+ %sel = select i1 %cmp, bfloat 0xR4248, bfloat %other
+ ret bfloat %sel
+}
+
+; Should be folded: bfloat fcmp oeq + select with constant in true value (commutative)
+define bfloat @fcmp_select_fold_oeq_imm_bf16(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_fold_oeq_imm_bf16:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT: s_mov_b32 s4, 0x42480000
+; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_oeq_imm_bf16:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v2
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq bfloat 0xR4248, %arg
+ %sel = select i1 %cmp, bfloat 0xR4248, bfloat %other
+ ret bfloat %sel
+}
+
+; Should be folded: bfloat fcmp one + select with constant in false value
+define bfloat @fcmp_select_fold_one_bf16_imm(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_fold_one_bf16_imm:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT: s_mov_b32 s4, 0x40200000
+; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_one_bf16_imm:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x40200000, v2
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one bfloat %arg, 0xR4020
+ %sel = select i1 %cmp, bfloat %other, bfloat 0xR4020
+ ret bfloat %sel
+}
+
+; Should be folded: bfloat fcmp one + select with constant in false value (commutative)
+define bfloat @fcmp_select_fold_one_imm_bf16(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_fold_one_imm_bf16:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT: s_mov_b32 s4, 0x40200000
+; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v2
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_fold_one_imm_bf16:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x40200000, v2
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one bfloat 0xR4020, %arg
+ %sel = select i1 %cmp, bfloat %other, bfloat 0xR4020
+ ret bfloat %sel
+}
+
+; Should NOT be folded: different constants
+define bfloat @fcmp_select_no_fold_bf16_different_const(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_no_fold_bf16_different_const:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX900-NEXT: s_mov_b32 s4, 0x42480000
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x4300
+; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_bf16_different_const:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x42480000, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4300, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq bfloat %arg, 0xR4248
+ %sel = select i1 %cmp, bfloat 0xR4300, bfloat %other
+ ret bfloat %sel
+}
+
+; Should NOT be folded: NaN values bypass optimization
+define bfloat @fcmp_select_no_fold_nan_bf16(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_no_fold_nan_bf16:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_nan_bf16:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_mov_b32_e32 v0, v1
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq bfloat %arg, 0xR7FC0
+ %sel = select i1 %cmp, bfloat 0xR7FC0, bfloat %other
+ ret bfloat %sel
+}
+
+; Should NOT be folded: bfloat fcmp one with NaN constant
+define bfloat @fcmp_select_no_fold_nan_bf16_one(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_no_fold_nan_bf16_one:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0x7fc0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_nan_bf16_one:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_mov_b32_e32 v0, 0x7fc0
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one bfloat %arg, 0xR7FC0
+ %sel = select i1 %cmp, bfloat %other, bfloat 0xR7FC0
+ ret bfloat %sel
+}
+
+; Should NOT be folded: bfloat fcmp one with +Inf constant
+define bfloat @fcmp_select_no_fold_posinf_bf16_one(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_no_fold_posinf_bf16_one:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX900-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f80
+; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_posinf_bf16_one:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x7f800000, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x7f80, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one bfloat %arg, 0xR7F80
+ %sel = select i1 %cmp, bfloat %other, bfloat 0xR7F80
+ ret bfloat %sel
+}
+
+; Should NOT be folded: bfloat fcmp one with -Inf constant
+define bfloat @fcmp_select_no_fold_neginf_bf16_one(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_no_fold_neginf_bf16_one:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX900-NEXT: s_mov_b32 s4, 0xff800000
+; GFX900-NEXT: v_mov_b32_e32 v2, 0xffffff80
+; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_neginf_bf16_one:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0xff800000, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xffffff80, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one bfloat %arg, 0xRFF80
+ %sel = select i1 %cmp, bfloat %other, bfloat 0xRFF80
+ ret bfloat %sel
+}
+
+; Should NOT be folded: bfloat fcmp oeq with zero constant
+define bfloat @fcmp_select_no_fold_oeq_bf16_zero(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_no_fold_oeq_bf16_zero:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX900-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_oeq_bf16_zero:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1010-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq bfloat %arg, 0xR0000
+ %sel = select i1 %cmp, bfloat 0xR0000, bfloat %other
+ ret bfloat %sel
+}
+
+; Should NOT be folded: bfloat fcmp one with negative zero constant
+define bfloat @fcmp_select_no_fold_one_bf16_negzero(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_no_fold_one_bf16_negzero:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX900-NEXT: s_brev_b32 s4, 1
+; GFX900-NEXT: v_mov_b32_e32 v2, 0xffff8000
+; GFX900-NEXT: v_cmp_lg_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_one_bf16_negzero:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1010-NEXT: v_cmp_lg_f32_e32 vcc_lo, 0x80000000, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0xffff8000, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp one bfloat 0xR8000, %arg
+ %sel = select i1 %cmp, bfloat %other, bfloat 0xR8000
+ ret bfloat %sel
+}
+
+; Should NOT be folded: bfloat fcmp oeq with constant in other position
+define bfloat @fcmp_select_no_fold_bf16_other_pos(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_no_fold_bf16_other_pos:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX900-NEXT: s_mov_b32 s4, 0x42480000
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248
+; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_bf16_other_pos:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1010-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42480000, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp oeq bfloat %arg, 0xR4248
+ %sel = select i1 %cmp, bfloat %other, bfloat 0xR4248
+ ret bfloat %sel
+}
+
+; Should NOT be folded: bfloat unsupported comparison type
+define bfloat @fcmp_select_no_fold_bf16_unsupported_cmp(bfloat %arg, bfloat %other) {
+; GFX900-LABEL: fcmp_select_no_fold_bf16_unsupported_cmp:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX900-NEXT: s_mov_b32 s4, 0x42480000
+; GFX900-NEXT: v_mov_b32_e32 v2, 0x4248
+; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1010-LABEL: fcmp_select_no_fold_bf16_unsupported_cmp:
+; GFX1010: ; %bb.0: ; %entry
+; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1010-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x42480000, v0
+; GFX1010-NEXT: v_cndmask_b32_e32 v0, 0x4248, v1, vcc_lo
+; GFX1010-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %cmp = fcmp olt bfloat %arg, 0xR4248
+ %sel = select i1 %cmp, bfloat %other, bfloat 0xR4248
+ ret bfloat %sel
+}