aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2012-10-30 13:54:50 +0000
committerJoseph Myers <joseph@codesourcery.com>2012-10-30 13:54:50 +0000
commit8627a2329c5e6fc09bf9f0f070a21aca41f2b122 (patch)
treef1e7aa19f3fe1a9b3f9e403ced782c6f7d612f66
parent2a27fd6dae3edec949deda9a55928a0e22c8a8ae (diff)
downloadglibc-8627a2329c5e6fc09bf9f0f070a21aca41f2b122.zip
glibc-8627a2329c5e6fc09bf9f0f070a21aca41f2b122.tar.gz
glibc-8627a2329c5e6fc09bf9f0f070a21aca41f2b122.tar.bz2
Fix fma missing underflows and bad results for some subnormal results (bugs 14152, 14783).
-rw-r--r--ChangeLog13
-rw-r--r--NEWS14
-rw-r--r--math/libm-test.inc65
-rw-r--r--sysdeps/ieee754/dbl-64/s_fma.c22
-rw-r--r--sysdeps/ieee754/ldbl-128/s_fmal.c22
-rw-r--r--sysdeps/ieee754/ldbl-96/s_fmal.c22
6 files changed, 105 insertions, 53 deletions
diff --git a/ChangeLog b/ChangeLog
index 067337a..912167b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,18 @@
2012-10-30 Joseph Myers <joseph@codesourcery.com>
+ [BZ #14152]
+ [BZ #14783]
+ * sysdeps/ieee754/dbl-64/s_fma.c (__fma): Extract low bits of
+ result and shift together with sticky bit instead of replicating
+ round-to-nearest rounding.
+ * sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise.
+ * sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise.
+ * math/libm-test.inc (fma_test): Add more tests. Do not permit
+ missing underflow exceptions.
+ (fma_test_towardzero): Add more tests.
+ (fma_test_downward): Likewise.
+ (fma_test_upward): Likewise.
+
[BZ #14047]
* sysdeps/generic/tininess.h: New file.
* sysdeps/i386/tininess.h: Likewise.
diff --git a/NEWS b/NEWS
index e4e6726..b54365f 100644
--- a/NEWS
+++ b/NEWS
@@ -11,13 +11,13 @@ Version 2.17
1349, 3479, 5044, 5298, 5400, 6530, 6778, 6808, 9685, 9914, 10014, 10038,
10631, 11438, 11607, 12140, 13412, 13542, 13601, 13629, 13679, 13696,
- 13717, 13741, 13939, 13966, 14042, 14047, 14090, 14150, 14151, 14154,
- 14157, 14166, 14173, 14195, 14237, 14251, 14252, 14283, 14298, 14303,
- 14307, 14328, 14331, 14336, 14337, 14347, 14349, 14376, 14417, 14459,
- 14476, 14477, 14505, 14510, 14516, 14518, 14519, 14530, 14532, 14538,
- 14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583, 14587,
- 14602, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14683, 14694,
- 14716, 14743, 14767.
+ 13717, 13741, 13939, 13966, 14042, 14047, 14090, 14150, 14151, 14152,
+ 14154, 14157, 14166, 14173, 14195, 14237, 14251, 14252, 14283, 14298,
+ 14303, 14307, 14328, 14331, 14336, 14337, 14347, 14349, 14376, 14417,
+ 14459, 14476, 14477, 14505, 14510, 14516, 14518, 14519, 14530, 14532,
+ 14538, 14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583,
+ 14587, 14602, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14683,
+ 14694, 14716, 14743, 14767, 14783.
* Support for STT_GNU_IFUNC symbols added for s390 and s390x.
Optimized versions of memcpy, memset, and memcmp added for System z10 and
diff --git a/math/libm-test.inc b/math/libm-test.inc
index e828fc2..1e067fe 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -4616,6 +4616,8 @@ fma_test (void)
TEST_fff_f (fma, 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115);
TEST_fff_f (fma, -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127);
TEST_fff_f (fma, 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127);
+ TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13);
@@ -4635,10 +4637,11 @@ fma_test (void)
TEST_fff_f (fma, 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022, UNDERFLOW_EXCEPTION);
- /* Sometimes the FE_UNDERFLOW is not set, so be prepared. See Bug 14152. */
- TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022, UNDERFLOW_EXCEPTION_OK);
+ TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -0x1.fffffffffffffp-711, 0x1.fffffffffffffp-275, 0x1.fffffe00007ffp-983, 0x1.7ffffe00007ffp-983);
+ TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
TEST_fff_f (fma, -0x8.03fcp+3696L, 0xf.fffffffffffffffp-6140L, 0x8.3ffffffffffffffp-2450L, -0x8.01ecp-2440L);
@@ -4646,8 +4649,9 @@ fma_test (void)
TEST_fff_f (fma, 0xc.7fc000003ffffffp-1194L, 0x8.1e0003fffffffffp+15327L, -0x8.fffep+14072L, 0xc.ae9f164020effffp+14136L);
TEST_fff_f (fma, -0x8.0001fc000000003p+1798L, 0xcp-2230L, 0x8.f7e000000000007p-468L, -0xc.0002f9ffee10404p-429L);
TEST_fff_f (fma, 0xc.0000000000007ffp+10130L, -0x8.000000000000001p+4430L, 0xc.07000000001ffffp+14513L, -0xb.fffffffffffd7e4p+14563L);
- /* Bug 14152: underflow exception may be missing. */
- TEST_fff_f (fma, 0xb.ffffp-4777L, 0x8.000000fffffffffp-11612L, -0x0.3800fff8p-16385L, 0x5.c7fe80c7ffeffffp-16385L, UNDERFLOW_EXCEPTION_OK);
+ TEST_fff_f (fma, 0xb.ffffp-4777L, 0x8.000000fffffffffp-11612L, -0x0.3800fff8p-16385L, 0x5.c7fe80c7ffeffffp-16385L, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
#endif
#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
TEST_fff_f (fma, 0x1.bb2de33e02ccbbfa6e245a7c1f71p-2584L, -0x1.6b500daf0580d987f1bc0cadfcddp-13777L, 0x1.613cd91d9fed34b33820e5ab9d8dp-16378L, -0x1.3a79fb50eb9ce887cffa0f09bd9fp-16360L);
@@ -4663,6 +4667,8 @@ fma_test (void)
TEST_fff_f (fma, 0x1.00000000000007ffffffffffffffp-9045L, -0x1.ffffffffffff80000001ffffffffp+4773L, -0x1.f8p-4316L, -0x1.00000000000f88000000fffffdffp-4271L);
TEST_fff_f (fma, 0x1.4e922764c90701d4a2f21d01893dp-8683L, -0x1.955a12e2d7c9447c27fa022fc865p+212L, -0x1.e9634462eaef96528b90b6944578p-8521L, -0x1.08e1783184a371943d3598e10865p-8470L);
TEST_fff_f (fma, 0x1.801181509c03bdbef10d6165588cp-15131L, 0x1.ad86f8e57d3d40bfa8007780af63p-368L, -0x1.6e9df0dab1c9f1d7a6043c390741p-15507L, 0x1.417c9b2b15e2ad57dc9e0e920844p-15498L);
+ TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
#endif
END (fma);
@@ -4717,6 +4723,23 @@ fma_test_towardzero (void)
TEST_fff_f (fma, -min_value, min_value, minus_zero, minus_zero, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION);
+
+#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
+ TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
+ TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
+ TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
+ TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+#endif
}
fesetround (save_round_mode);
@@ -4773,6 +4796,23 @@ fma_test_downward (void)
TEST_fff_f (fma, -min_value, min_value, minus_zero, -min_subnorm_value, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION);
+
+#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
+ TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00008p-127, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
+ TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
+ TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
+ TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+#endif
}
fesetround (save_round_mode);
@@ -4829,6 +4869,23 @@ fma_test_upward (void)
TEST_fff_f (fma, -min_value, min_value, minus_zero, minus_zero, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -min_value, -min_value, plus_zero, min_subnorm_value, UNDERFLOW_EXCEPTION);
TEST_fff_f (fma, -min_value, -min_value, minus_zero, min_subnorm_value, UNDERFLOW_EXCEPTION);
+
+#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
+ TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00008p-127, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
+ TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64
+ TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+#endif
+#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113
+ TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION);
+ TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION);
+#endif
}
fesetround (save_round_mode);
diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index 5e21461..108fd66 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -210,20 +210,14 @@ __fma (double x, double y, double z)
{
/* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
v.ieee.mantissa1 & 1 is the round bit and j is our sticky
- bit. In round-to-nearest 001 rounds down like 00,
- 011 rounds up, even though 01 rounds down (thus we need
- to adjust), 101 rounds down like 10 and 111 rounds up
- like 11. */
- if ((v.ieee.mantissa1 & 3) == 1)
- {
- v.d *= 0x1p-106;
- if (v.ieee.negative)
- return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
- else
- return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
- }
- else
- return v.d * 0x1p-106;
+ bit. */
+ w.d = 0.0;
+ w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
+ w.ieee.negative = v.ieee.negative;
+ v.ieee.mantissa1 &= ~3U;
+ v.d *= 0x1p-106;
+ w.d *= 0x1p-2;
+ return v.d + w.d;
}
v.ieee.mantissa1 |= j;
return v.d * 0x1p-106;
diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c
index 46b3d81..7fb9856 100644
--- a/sysdeps/ieee754/ldbl-128/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-128/s_fmal.c
@@ -211,20 +211,14 @@ __fmal (long double x, long double y, long double z)
{
/* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
v.ieee.mantissa3 & 1 is the round bit and j is our sticky
- bit. In round-to-nearest 001 rounds down like 00,
- 011 rounds up, even though 01 rounds down (thus we need
- to adjust), 101 rounds down like 10 and 111 rounds up
- like 11. */
- if ((v.ieee.mantissa3 & 3) == 1)
- {
- v.d *= 0x1p-226L;
- if (v.ieee.negative)
- return v.d - 0x1p-16494L /* __LDBL_DENORM_MIN__ */;
- else
- return v.d + 0x1p-16494L /* __LDBL_DENORM_MIN__ */;
- }
- else
- return v.d * 0x1p-226L;
+ bit. */
+ w.d = 0.0L;
+ w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
+ w.ieee.negative = v.ieee.negative;
+ v.ieee.mantissa3 &= ~3U;
+ v.d *= 0x1p-226L;
+ w.d *= 0x1p-2L;
+ return v.d + w.d;
}
v.ieee.mantissa3 |= j;
return v.d * 0x1p-226L;
diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c
index d125124..7310491 100644
--- a/sysdeps/ieee754/ldbl-96/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-96/s_fmal.c
@@ -211,20 +211,14 @@ __fmal (long double x, long double y, long double z)
{
/* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
v.ieee.mantissa1 & 1 is the round bit and j is our sticky
- bit. In round-to-nearest 001 rounds down like 00,
- 011 rounds up, even though 01 rounds down (thus we need
- to adjust), 101 rounds down like 10 and 111 rounds up
- like 11. */
- if ((v.ieee.mantissa1 & 3) == 1)
- {
- v.d *= 0x1p-128L;
- if (v.ieee.negative)
- return v.d - 0x1p-16445L /* __LDBL_DENORM_MIN__ */;
- else
- return v.d + 0x1p-16445L /* __LDBL_DENORM_MIN__ */;
- }
- else
- return v.d * 0x1p-128L;
+ bit. */
+ w.d = 0.0L;
+ w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
+ w.ieee.negative = v.ieee.negative;
+ v.ieee.mantissa1 &= ~3U;
+ v.d *= 0x1p-128L;
+ w.d *= 0x1p-2L;
+ return v.d + w.d;
}
v.ieee.mantissa1 |= j;
return v.d * 0x1p-128L;