diff options
author | Serge Pavlov <sepavloff@gmail.com> | 2020-02-27 18:16:14 +0700 |
---|---|---|
committer | Serge Pavlov <sepavloff@gmail.com> | 2020-03-11 10:38:46 +0700 |
commit | 14a1b80e044aac1947c891525cf30521be0a79b7 (patch) | |
tree | 747231d13fe6fad8909535d57e4d732adf9a3336 /llvm/lib/Support/APFloat.cpp | |
parent | c0ad75e7587d2e08ba2d5c55311b44ce7f9b20e4 (diff) | |
download | llvm-14a1b80e044aac1947c891525cf30521be0a79b7.zip llvm-14a1b80e044aac1947c891525cf30521be0a79b7.tar.gz llvm-14a1b80e044aac1947c891525cf30521be0a79b7.tar.bz2 |
Make IEEEFloat::roundToIntegral more standard conformant
Behavior of IEEEFloat::roundToIntegral is aligned with IEEE-754
operation roundToIntegralExact. In partucular this function now:
- returns opInvalid for signaling NaNs,
- returns opInexact if the result of rounding differs from argument.
Differential Revision: https://reviews.llvm.org/D75246
Diffstat (limited to 'llvm/lib/Support/APFloat.cpp')
-rw-r--r-- | llvm/lib/Support/APFloat.cpp | 62 |
1 files changed, 53 insertions, 9 deletions
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 5379d29..49f9cf8a 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -1977,14 +1977,59 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, return fs; } -/* Rounding-mode corrrect round to integral value. */ +/* Rounding-mode correct round to integral value. */ IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { opStatus fs; + if (isInfinity()) + // [IEEE Std 754-2008 6.1]: + // The behavior of infinity in floating-point arithmetic is derived from the + // limiting cases of real arithmetic with operands of arbitrarily + // large magnitude, when such a limit exists. + // ... + // Operations on infinite operands are usually exact and therefore signal no + // exceptions ... + return opOK; + + if (isNaN()) { + if (isSignaling()) { + // [IEEE Std 754-2008 6.2]: + // Under default exception handling, any operation signaling an invalid + // operation exception and for which a floating-point result is to be + // delivered shall deliver a quiet NaN. + makeQuiet(); + // [IEEE Std 754-2008 6.2]: + // Signaling NaNs shall be reserved operands that, under default exception + // handling, signal the invalid operation exception(see 7.2) for every + // general-computational and signaling-computational operation except for + // the conversions described in 5.12. + return opInvalidOp; + } else { + // [IEEE Std 754-2008 6.2]: + // For an operation with quiet NaN inputs, other than maximum and minimum + // operations, if a floating-point result is to be delivered the result + // shall be a quiet NaN which should be one of the input NaNs. + // ... + // Every general-computational and quiet-computational operation involving + // one or more input NaNs, none of them signaling, shall signal no + // exception, except fusedMultiplyAdd might signal the invalid operation + // exception(see 7.2). + return opOK; + } + } + + if (isZero()) { + // [IEEE Std 754-2008 6.3]: + // ... the sign of the result of conversions, the quantize operation, the + // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is + // the sign of the first or only operand. + return opOK; + } + // If the exponent is large enough, we know that this value is already // integral, and the arithmetic below would potentially cause it to saturate // to +/-Inf. Bail out early instead. - if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) + if (exponent+1 >= (int)semanticsPrecision(*semantics)) return opOK; // The algorithm here is quite simple: we add 2^(p-1), where p is the @@ -1998,19 +2043,18 @@ IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { IEEEFloat MagicConstant(*semantics); fs = MagicConstant.convertFromAPInt(IntegerConstant, false, rmNearestTiesToEven); + assert(fs == opOK); MagicConstant.sign = sign; - if (fs != opOK) - return fs; - - // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. + // Preserve the input sign so that we can handle the case of zero result + // correctly. bool inputSign = isNegative(); fs = add(MagicConstant, rounding_mode); - if (fs != opOK && fs != opInexact) - return fs; - fs = subtract(MagicConstant, rounding_mode); + // Current value and 'MagicConstant' are both integers, so the result of the + // subtraction is always exact according to Sterbenz' lemma. + subtract(MagicConstant, rounding_mode); // Restore the input sign. if (inputSign != isNegative()) |