diff options
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/ieee754/dbl-64/s_fma.c | 36 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128/s_fmal.c | 38 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/s_fmal.c | 36 |
3 files changed, 98 insertions, 12 deletions
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c index 155a773..68c63a1 100644 --- a/sysdeps/ieee754/dbl-64/s_fma.c +++ b/sysdeps/ieee754/dbl-64/s_fma.c @@ -56,16 +56,44 @@ __fma (double x, double y, double z) if (z == 0 && x != 0 && y != 0) return x * y; /* If x or y or z is Inf/NaN, or if fma will certainly overflow, - or if x * y is less than half of DBL_DENORM_MIN, - compute as x * y + z. */ + or if x * y is zero, compute as x * y + z. */ if (u.ieee.exponent == 0x7ff || v.ieee.exponent == 0x7ff || w.ieee.exponent == 0x7ff || u.ieee.exponent + v.ieee.exponent > 0x7ff + IEEE754_DOUBLE_BIAS - || u.ieee.exponent + v.ieee.exponent - < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2) + || x == 0 + || y == 0) return x * y + z; + /* If x * y is less than 1/4 of DBL_DENORM_MIN, neither the + result nor whether there is underflow depends on its exact + value, only on its sign. */ + if (u.ieee.exponent + v.ieee.exponent + < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2) + { + int neg = u.ieee.negative ^ v.ieee.negative; + double tiny = neg ? -0x1p-1074 : 0x1p-1074; + if (w.ieee.exponent >= 3) + return tiny + z; + /* Scaling up, adding TINY and scaling down produces the + correct result, because in round-to-nearest mode adding + TINY has no effect and in other modes double rounding is + harmless. But it may not produce required underflow + exceptions. */ + v.d = z * 0x1p54 + tiny; + if (TININESS_AFTER_ROUNDING + ? v.ieee.exponent < 55 + : (w.ieee.exponent == 0 + || (w.ieee.exponent == 1 + && w.ieee.negative != neg + && w.ieee.mantissa1 == 0 + && w.ieee.mantissa0 == 0))) + { + volatile double force_underflow = x * y; + (void) force_underflow; + } + return v.d * 0x1p-54; + } if (u.ieee.exponent + v.ieee.exponent >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG) { diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c index 201bff9..c6a3d71 100644 --- a/sysdeps/ieee754/ldbl-128/s_fmal.c +++ b/sysdeps/ieee754/ldbl-128/s_fmal.c @@ -57,16 +57,46 @@ __fmal (long double x, long double y, long double z) if (z == 0 && x != 0 && y != 0) return x * y; /* If x or y or z is Inf/NaN, or if fma will certainly overflow, - or if x * y is less than half of LDBL_DENORM_MIN, - compute as x * y + z. */ + or if x * y is zero, compute as x * y + z. */ if (u.ieee.exponent == 0x7fff || v.ieee.exponent == 0x7fff || w.ieee.exponent == 0x7fff || u.ieee.exponent + v.ieee.exponent > 0x7fff + IEEE854_LONG_DOUBLE_BIAS - || u.ieee.exponent + v.ieee.exponent - < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) + || x == 0 + || y == 0) return x * y + z; + /* If x * y is less than 1/4 of LDBL_DENORM_MIN, neither the + result nor whether there is underflow depends on its exact + value, only on its sign. */ + if (u.ieee.exponent + v.ieee.exponent + < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) + { + int neg = u.ieee.negative ^ v.ieee.negative; + long double tiny = neg ? -0x1p-16494L : 0x1p-16494L; + if (w.ieee.exponent >= 3) + return tiny + z; + /* Scaling up, adding TINY and scaling down produces the + correct result, because in round-to-nearest mode adding + TINY has no effect and in other modes double rounding is + harmless. But it may not produce required underflow + exceptions. */ + v.d = z * 0x1p114L + tiny; + if (TININESS_AFTER_ROUNDING + ? v.ieee.exponent < 115 + : (w.ieee.exponent == 0 + || (w.ieee.exponent == 1 + && w.ieee.negative != neg + && w.ieee.mantissa3 == 0 + && w.ieee.mantissa2 == 0 + && w.ieee.mantissa1 == 0 + && w.ieee.mantissa0 == 0))) + { + volatile long double force_underflow = x * y; + (void) force_underflow; + } + return v.d * 0x1p-114L; + } if (u.ieee.exponent + v.ieee.exponent >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG) { diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c index 91333af..b5fdcba 100644 --- a/sysdeps/ieee754/ldbl-96/s_fmal.c +++ b/sysdeps/ieee754/ldbl-96/s_fmal.c @@ -57,16 +57,44 @@ __fmal (long double x, long double y, long double z) if (z == 0 && x != 0 && y != 0) return x * y; /* If x or y or z is Inf/NaN, or if fma will certainly overflow, - or if x * y is less than half of LDBL_DENORM_MIN, - compute as x * y + z. */ + or if x * y is zero, compute as x * y + z. */ if (u.ieee.exponent == 0x7fff || v.ieee.exponent == 0x7fff || w.ieee.exponent == 0x7fff || u.ieee.exponent + v.ieee.exponent > 0x7fff + IEEE854_LONG_DOUBLE_BIAS - || u.ieee.exponent + v.ieee.exponent - < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) + || x == 0 + || y == 0) return x * y + z; + /* If x * y is less than 1/4 of LDBL_DENORM_MIN, neither the + result nor whether there is underflow depends on its exact + value, only on its sign. */ + if (u.ieee.exponent + v.ieee.exponent + < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) + { + int neg = u.ieee.negative ^ v.ieee.negative; + long double tiny = neg ? -0x1p-16445L : 0x1p-16445L; + if (w.ieee.exponent >= 3) + return tiny + z; + /* Scaling up, adding TINY and scaling down produces the + correct result, because in round-to-nearest mode adding + TINY has no effect and in other modes double rounding is + harmless. But it may not produce required underflow + exceptions. */ + v.d = z * 0x1p65L + tiny; + if (TININESS_AFTER_ROUNDING + ? v.ieee.exponent < 66 + : (w.ieee.exponent == 0 + || (w.ieee.exponent == 1 + && w.ieee.negative != neg + && w.ieee.mantissa1 == 0 + && w.ieee.mantissa0 == 0x80000000))) + { + volatile long double force_underflow = x * y; + (void) force_underflow; + } + return v.d * 0x1p-65L; + } if (u.ieee.exponent + v.ieee.exponent >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG) { |