aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/ieee754/dbl-64/s_fma.c36
-rw-r--r--sysdeps/ieee754/ldbl-128/s_fmal.c38
-rw-r--r--sysdeps/ieee754/ldbl-96/s_fmal.c36
3 files changed, 98 insertions, 12 deletions
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index 155a773..68c63a1 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -56,16 +56,44 @@ __fma (double x, double y, double z)
if (z == 0 && x != 0 && y != 0)
return x * y;
/* If x or y or z is Inf/NaN, or if fma will certainly overflow,
- or if x * y is less than half of DBL_DENORM_MIN,
- compute as x * y + z. */
+ or if x * y is zero, compute as x * y + z. */
if (u.ieee.exponent == 0x7ff
|| v.ieee.exponent == 0x7ff
|| w.ieee.exponent == 0x7ff
|| u.ieee.exponent + v.ieee.exponent
> 0x7ff + IEEE754_DOUBLE_BIAS
- || u.ieee.exponent + v.ieee.exponent
- < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
+ || x == 0
+ || y == 0)
return x * y + z;
+ /* If x * y is less than 1/4 of DBL_DENORM_MIN, neither the
+ result nor whether there is underflow depends on its exact
+ value, only on its sign. */
+ if (u.ieee.exponent + v.ieee.exponent
+ < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
+ {
+ int neg = u.ieee.negative ^ v.ieee.negative;
+ double tiny = neg ? -0x1p-1074 : 0x1p-1074;
+ if (w.ieee.exponent >= 3)
+ return tiny + z;
+ /* Scaling up, adding TINY and scaling down produces the
+ correct result, because in round-to-nearest mode adding
+ TINY has no effect and in other modes double rounding is
+ harmless. But it may not produce required underflow
+ exceptions. */
+ v.d = z * 0x1p54 + tiny;
+ if (TININESS_AFTER_ROUNDING
+ ? v.ieee.exponent < 55
+ : (w.ieee.exponent == 0
+ || (w.ieee.exponent == 1
+ && w.ieee.negative != neg
+ && w.ieee.mantissa1 == 0
+ && w.ieee.mantissa0 == 0)))
+ {
+ volatile double force_underflow = x * y;
+ (void) force_underflow;
+ }
+ return v.d * 0x1p-54;
+ }
if (u.ieee.exponent + v.ieee.exponent
>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
{
diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c
index 201bff9..c6a3d71 100644
--- a/sysdeps/ieee754/ldbl-128/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-128/s_fmal.c
@@ -57,16 +57,46 @@ __fmal (long double x, long double y, long double z)
if (z == 0 && x != 0 && y != 0)
return x * y;
/* If x or y or z is Inf/NaN, or if fma will certainly overflow,
- or if x * y is less than half of LDBL_DENORM_MIN,
- compute as x * y + z. */
+ or if x * y is zero, compute as x * y + z. */
if (u.ieee.exponent == 0x7fff
|| v.ieee.exponent == 0x7fff
|| w.ieee.exponent == 0x7fff
|| u.ieee.exponent + v.ieee.exponent
> 0x7fff + IEEE854_LONG_DOUBLE_BIAS
- || u.ieee.exponent + v.ieee.exponent
- < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
+ || x == 0
+ || y == 0)
return x * y + z;
+ /* If x * y is less than 1/4 of LDBL_DENORM_MIN, neither the
+ result nor whether there is underflow depends on its exact
+ value, only on its sign. */
+ if (u.ieee.exponent + v.ieee.exponent
+ < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
+ {
+ int neg = u.ieee.negative ^ v.ieee.negative;
+ long double tiny = neg ? -0x1p-16494L : 0x1p-16494L;
+ if (w.ieee.exponent >= 3)
+ return tiny + z;
+ /* Scaling up, adding TINY and scaling down produces the
+ correct result, because in round-to-nearest mode adding
+ TINY has no effect and in other modes double rounding is
+ harmless. But it may not produce required underflow
+ exceptions. */
+ v.d = z * 0x1p114L + tiny;
+ if (TININESS_AFTER_ROUNDING
+ ? v.ieee.exponent < 115
+ : (w.ieee.exponent == 0
+ || (w.ieee.exponent == 1
+ && w.ieee.negative != neg
+ && w.ieee.mantissa3 == 0
+ && w.ieee.mantissa2 == 0
+ && w.ieee.mantissa1 == 0
+ && w.ieee.mantissa0 == 0)))
+ {
+ volatile long double force_underflow = x * y;
+ (void) force_underflow;
+ }
+ return v.d * 0x1p-114L;
+ }
if (u.ieee.exponent + v.ieee.exponent
>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
{
diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c
index 91333af..b5fdcba 100644
--- a/sysdeps/ieee754/ldbl-96/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-96/s_fmal.c
@@ -57,16 +57,44 @@ __fmal (long double x, long double y, long double z)
if (z == 0 && x != 0 && y != 0)
return x * y;
/* If x or y or z is Inf/NaN, or if fma will certainly overflow,
- or if x * y is less than half of LDBL_DENORM_MIN,
- compute as x * y + z. */
+ or if x * y is zero, compute as x * y + z. */
if (u.ieee.exponent == 0x7fff
|| v.ieee.exponent == 0x7fff
|| w.ieee.exponent == 0x7fff
|| u.ieee.exponent + v.ieee.exponent
> 0x7fff + IEEE854_LONG_DOUBLE_BIAS
- || u.ieee.exponent + v.ieee.exponent
- < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
+ || x == 0
+ || y == 0)
return x * y + z;
+ /* If x * y is less than 1/4 of LDBL_DENORM_MIN, neither the
+ result nor whether there is underflow depends on its exact
+ value, only on its sign. */
+ if (u.ieee.exponent + v.ieee.exponent
+ < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
+ {
+ int neg = u.ieee.negative ^ v.ieee.negative;
+ long double tiny = neg ? -0x1p-16445L : 0x1p-16445L;
+ if (w.ieee.exponent >= 3)
+ return tiny + z;
+ /* Scaling up, adding TINY and scaling down produces the
+ correct result, because in round-to-nearest mode adding
+ TINY has no effect and in other modes double rounding is
+ harmless. But it may not produce required underflow
+ exceptions. */
+ v.d = z * 0x1p65L + tiny;
+ if (TININESS_AFTER_ROUNDING
+ ? v.ieee.exponent < 66
+ : (w.ieee.exponent == 0
+ || (w.ieee.exponent == 1
+ && w.ieee.negative != neg
+ && w.ieee.mantissa1 == 0
+ && w.ieee.mantissa0 == 0x80000000)))
+ {
+ volatile long double force_underflow = x * y;
+ (void) force_underflow;
+ }
+ return v.d * 0x1p-65L;
+ }
if (u.ieee.exponent + v.ieee.exponent
>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
{