diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2023-02-06 14:56:39 -0400 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2023-07-11 18:30:15 -0400 |
| commit | b59022b42e0b2c3f5f183306114b398c7eda7e92 (patch) | |
| tree | 16d72355dd404b133104e47a41f5dd0501857dca | |
| parent | a709c49d75c62ecce9e5598994692546dfd1e02b (diff) | |
| download | llvm-b59022b42e0b2c3f5f183306114b398c7eda7e92.zip llvm-b59022b42e0b2c3f5f183306114b398c7eda7e92.tar.gz llvm-b59022b42e0b2c3f5f183306114b398c7eda7e92.tar.bz2 | |
DAG: Handle lowering of unordered fcZero|fcSubnormal to fcmp
| -rw-r--r-- | llvm/lib/CodeGen/CodeGenCommonISel.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 44 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll | 45 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/is_fpclass.ll | 190 |
4 files changed, 114 insertions, 167 deletions
diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp index 62613c0..577c5db 100644 --- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -196,7 +196,9 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) { case fcFinite: case fcPosFinite: case fcNegFinite: + case fcZero | fcNan: case fcSubnormal | fcZero: + case fcSubnormal | fcZero | fcNan: return InvertedTest; default: return fcNone; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4168006..e2903ef 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8056,18 +8056,28 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, return SDValue(); } -/// If this FPClassTest can be performed with a fcmp to 0, return the test mask -/// for the floating-point mode. -static bool isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, - const MachineFunction &MF) { - // TODO: Handle unordered compares - if (Test == fcZero && +/// Returns a true value if if this FPClassTest can be performed with an ordered +/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns +/// std::nullopt if it cannot be performed as a compare with 0. +static std::optional<bool> isFCmpEqualZero(FPClassTest Test, + const fltSemantics &Semantics, + const MachineFunction &MF) { + FPClassTest OrderedMask = Test & ~fcNan; + FPClassTest NanTest = Test & fcNan; + bool IsOrdered = NanTest == fcNone; + bool IsUnordered = NanTest == fcNan; + + // Skip cases that are testing for only a qnan or snan. + if (!IsOrdered && !IsUnordered) + return std::nullopt; + + if (OrderedMask == fcZero && MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE) - return true; - if (Test == (fcZero | fcSubnormal) && + return IsOrdered; + if (OrderedMask == (fcZero | fcSubnormal) && MF.getDenormalMode(Semantics).inputsAreZero()) - return true; - return false; + return IsOrdered; + return std::nullopt; } SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, @@ -8109,14 +8119,20 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, // exceptions are ignored. if (Flags.hasNoFPExcept() && isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) { - if (isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction()) && - (isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ, - OperandVT.getScalarType().getSimpleVT()))) { + ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ; + ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ; + + if (std::optional<bool> IsCmp0 = + isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction()); + IsCmp0 && (isCondCodeLegalOrCustom( + *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode, + OperandVT.getScalarType().getSimpleVT()))) { + // If denormals could be implicitly treated as 0, this is not equivalent // to a compare with 0 since it will also be true for denormals. return DAG.getSetCC(DL, ResultVT, Op, DAG.getConstantFP(0.0, DL, OperandVT), - IsInverted ? ISD::SETUNE : ISD::SETOEQ); + *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode); } if (Test == fcNan && diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll index aa7f6bf..9baf7fd 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -2545,18 +2545,11 @@ define i1 @not_iszero_or_nan_f16(half %x) { ; GFX7SELDAG: ; %bb.0: ; %entry ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 -; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c01 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 -; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0 -; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff -; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 -; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc +; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 +; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc ; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2619,18 +2612,11 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 { ; GFX7SELDAG: ; %bb.0: ; %entry ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 -; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c01 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 -; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0 -; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff -; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 -; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc +; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 +; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc ; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2693,18 +2679,11 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 { ; GFX7SELDAG: ; %bb.0: ; %entry ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 -; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800 +; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c01 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 -; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 -; GFX7SELDAG-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0 -; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff -; GFX7SELDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 -; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7SELDAG-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 -; GFX7SELDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc +; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 +; GFX7SELDAG-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 +; GFX7SELDAG-NEXT: s_and_b64 s[4:5], s[4:5], vcc ; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll index 09ad6ad..47cf85d 100644 --- a/llvm/test/CodeGen/X86/is_fpclass.ll +++ b/llvm/test/CodeGen/X86/is_fpclass.ll @@ -1586,24 +1586,20 @@ entry: define i1 @iszero_or_nan_f(float %x) { ; CHECK-32-LABEL: iszero_or_nan_f: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: testl %eax, %eax +; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-32-NEXT: fldz +; CHECK-32-NEXT: fucompp +; CHECK-32-NEXT: fnstsw %ax +; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-32-NEXT: sahf ; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: orb %cl, %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: iszero_or_nan_f: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: testl %eax, %eax +; CHECK-64-NEXT: xorps %xmm1, %xmm1 +; CHECK-64-NEXT: ucomiss %xmm1, %xmm0 ; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: orb %cl, %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 99) ; 0x60|0x3 = "zero|nan" @@ -1667,34 +1663,20 @@ entry: define i1 @not_iszero_or_nan_f(float %x) { ; CHECK-32-LABEL: not_iszero_or_nan_f: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: leal -1(%eax), %edx -; CHECK-32-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %dl -; CHECK-32-NEXT: orb %cl, %dl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %dl, %al +; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-32-NEXT: fldz +; CHECK-32-NEXT: fucompp +; CHECK-32-NEXT: fnstsw %ax +; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-32-NEXT: sahf +; CHECK-32-NEXT: setne %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_iszero_or_nan_f: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: leal -1(%rax), %edx -; CHECK-64-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %dl -; CHECK-64-NEXT: orb %cl, %dl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %dl, %al +; CHECK-64-NEXT: xorps %xmm1, %xmm1 +; CHECK-64-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: setne %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 924) ; ~0x60 = "~(zero|nan)" @@ -1706,32 +1688,22 @@ define i1 @not_iszero_or_nan_f_daz(float %x) #0 { ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: leal -1(%eax), %edx -; CHECK-32-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %dl -; CHECK-32-NEXT: orb %cl, %dl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %dl, %al +; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; CHECK-32-NEXT: setl %cl +; CHECK-32-NEXT: testl %eax, %eax +; CHECK-32-NEXT: setne %al +; CHECK-32-NEXT: andb %cl, %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_iszero_or_nan_f_daz: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: movd %xmm0, %eax ; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: leal -1(%rax), %edx -; CHECK-64-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %dl -; CHECK-64-NEXT: orb %cl, %dl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %dl, %al +; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; CHECK-64-NEXT: setl %cl +; CHECK-64-NEXT: testl %eax, %eax +; CHECK-64-NEXT: setne %al +; CHECK-64-NEXT: andb %cl, %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 924) ; ~(0x60|0x3) = "~(zero|nan)" @@ -1743,32 +1715,22 @@ define i1 @not_iszero_or_nan_f_maybe_daz(float %x) #1 { ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: leal -1(%eax), %edx -; CHECK-32-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %dl -; CHECK-32-NEXT: orb %cl, %dl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %dl, %al +; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; CHECK-32-NEXT: setl %cl +; CHECK-32-NEXT: testl %eax, %eax +; CHECK-32-NEXT: setne %al +; CHECK-32-NEXT: andb %cl, %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_iszero_or_nan_f_maybe_daz: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: movd %xmm0, %eax ; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: leal -1(%rax), %edx -; CHECK-64-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %dl -; CHECK-64-NEXT: orb %cl, %dl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %dl, %al +; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; CHECK-64-NEXT: setl %cl +; CHECK-64-NEXT: testl %eax, %eax +; CHECK-64-NEXT: setne %al +; CHECK-64-NEXT: andb %cl, %al ; CHECK-64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 924) ; ~(0x60|0x3) = "~(zero|nan)" @@ -2441,24 +2403,20 @@ define i1 @issubnormal_or_zero_or_nan_f(float %x) { define i1 @issubnormal_or_zero_or_nan_f_daz(float %x) #0 { ; CHECK-32-LABEL: issubnormal_or_zero_or_nan_f_daz: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-32-NEXT: fldz +; CHECK-32-NEXT: fucompp +; CHECK-32-NEXT: fnstsw %ax +; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-32-NEXT: sahf +; CHECK-32-NEXT: sete %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: issubnormal_or_zero_or_nan_f_daz: ; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: xorps %xmm1, %xmm1 +; CHECK-64-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: sete %al ; CHECK-64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 243) ; 0xf0|0x3 = "subnormal|zero|nan" ret i1 %class @@ -2561,26 +2519,24 @@ define i1 @not_issubnormal_or_nan_f(float %x) { define i1 @not_issubnormal_or_zero_or_nan_f(float %x) { ; CHECK-32-LABEL: not_issubnormal_or_zero_or_nan_f: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; CHECK-32-NEXT: setne %cl +; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; CHECK-32-NEXT: setl %al +; CHECK-32-NEXT: andb %cl, %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_issubnormal_or_zero_or_nan_f: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movd %xmm0, %eax +; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; CHECK-64-NEXT: setne %cl ; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; CHECK-64-NEXT: setl %al +; CHECK-64-NEXT: andb %cl, %al ; CHECK-64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 780) ; ~(0xf0|0x3) = ~"subnormal|zero|nan" ret i1 %class @@ -2589,26 +2545,20 @@ define i1 @not_issubnormal_or_zero_or_nan_f(float %x) { define i1 @not_issubnormal_or_zero_or_nan_f_daz(float %x) #0 { ; CHECK-32-LABEL: not_issubnormal_or_zero_or_nan_f_daz: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al +; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-32-NEXT: fldz +; CHECK-32-NEXT: fucompp +; CHECK-32-NEXT: fnstsw %ax +; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-32-NEXT: sahf +; CHECK-32-NEXT: setne %al ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: not_issubnormal_or_zero_or_nan_f_daz: ; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al +; CHECK-64-NEXT: xorps %xmm1, %xmm1 +; CHECK-64-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-64-NEXT: setne %al ; CHECK-64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 780) ; ~(0xf0|0x3) = ~"subnormal|zero|nan" ret i1 %class |
