diff options
author | Joseph Myers <joseph@codesourcery.com> | 2012-11-03 19:48:53 +0000 |
---|---|---|
committer | Joseph Myers <joseph@codesourcery.com> | 2012-11-03 19:48:53 +0000 |
commit | 5b5b04d6282df0364424c6f2c0462e5c1a4394b0 (patch) | |
tree | bef8cb91fffbf78e56f18479234abb47ce3054b2 /sysdeps | |
parent | fbeafedeea37e0af1984a6511018d159f5ceed6a (diff) | |
download | glibc-5b5b04d6282df0364424c6f2c0462e5c1a4394b0.zip glibc-5b5b04d6282df0364424c6f2c0462e5c1a4394b0.tar.gz glibc-5b5b04d6282df0364424c6f2c0462e5c1a4394b0.tar.bz2 |
Make fma use of Dekker and Knuth algorithms use round-to-nearest (bug 14796).
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/generic/math_private.h | 16 | ||||
-rw-r--r-- | sysdeps/i386/fpu/fclrexcpt.c | 3 | ||||
-rw-r--r-- | sysdeps/i386/fpu/fenv_private.h | 23 | ||||
-rw-r--r-- | sysdeps/ieee754/dbl-64/s_fma.c | 18 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128/s_fmal.c | 18 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/s_fma.c | 18 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-96/s_fmal.c | 18 | ||||
-rw-r--r-- | sysdeps/powerpc/fpu/fclrexcpt.c | 3 | ||||
-rw-r--r-- | sysdeps/s390/fpu/fclrexcpt.c | 3 | ||||
-rw-r--r-- | sysdeps/sh/sh4/fpu/fclrexcpt.c | 1 | ||||
-rw-r--r-- | sysdeps/sparc/fpu/fclrexcpt.c | 3 | ||||
-rw-r--r-- | sysdeps/sparc/fpu/fenv_private.h | 12 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/fclrexcpt.c | 3 |
13 files changed, 126 insertions, 13 deletions
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h index b375bc0..7661788 100644 --- a/sysdeps/generic/math_private.h +++ b/sysdeps/generic/math_private.h @@ -402,6 +402,22 @@ default_libc_feholdexcept (fenv_t *e) #endif static __always_inline void +default_libc_fesetround (int r) +{ + (void) fesetround (r); +} + +#ifndef libc_fesetround +# define libc_fesetround default_libc_fesetround +#endif +#ifndef libc_fesetroundf +# define libc_fesetroundf default_libc_fesetround +#endif +#ifndef libc_fesetroundl +# define libc_fesetroundl default_libc_fesetround +#endif + +static __always_inline void default_libc_feholdexcept_setround (fenv_t *e, int r) { feholdexcept (e); diff --git a/sysdeps/i386/fpu/fclrexcpt.c b/sysdeps/i386/fpu/fclrexcpt.c index f24d07f..f28ef6b 100644 --- a/sysdeps/i386/fpu/fclrexcpt.c +++ b/sysdeps/i386/fpu/fclrexcpt.c @@ -1,5 +1,5 @@ /* Clear given exceptions in current floating-point environment. - Copyright (C) 1997,99,2000, 2001, 2003, 2004 Free Software Foundation, Inc. + Copyright (C) 1997-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -65,4 +65,5 @@ strong_alias (__feclearexcept, __old_feclearexcept) compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1); #endif +libm_hidden_ver (__feclearexcept, feclearexcept) versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2); diff --git a/sysdeps/i386/fpu/fenv_private.h b/sysdeps/i386/fpu/fenv_private.h index f33f57c..03f4c97 100644 --- a/sysdeps/i386/fpu/fenv_private.h +++ b/sysdeps/i386/fpu/fenv_private.h @@ -77,6 +77,24 @@ libc_feholdexcept_387 (fenv_t *e) } static __always_inline void +libc_fesetround_sse (int r) +{ + unsigned int mxcsr; + asm (STMXCSR " %0" : "=m" (*&mxcsr)); + mxcsr = (mxcsr & ~0x6000) | (r << 3); + asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); +} + +static __always_inline void +libc_fesetround_387 (int r) +{ + fpu_control_t cw; + _FPU_GETCW (cw); + cw = (cw & ~0xc00) | r; + _FPU_SETCW (cw); +} + +static __always_inline void libc_feholdexcept_setround_sse (fenv_t *e, int r) { unsigned int mxcsr; @@ -247,6 +265,7 @@ libc_feresetround_387 (fenv_t *e) #ifdef __SSE_MATH__ # define libc_feholdexceptf libc_feholdexcept_sse +# define libc_fesetroundf libc_fesetround_sse # define libc_feholdexcept_setroundf libc_feholdexcept_setround_sse # define libc_fetestexceptf libc_fetestexcept_sse # define libc_fesetenvf libc_fesetenv_sse @@ -256,6 +275,7 @@ libc_feresetround_387 (fenv_t *e) # define libc_feresetroundf libc_feresetround_sse #else # define libc_feholdexceptf libc_feholdexcept_387 +# define libc_fesetroundf libc_fesetround_387 # define libc_feholdexcept_setroundf libc_feholdexcept_setround_387 # define libc_fetestexceptf libc_fetestexcept_387 # define libc_fesetenvf libc_fesetenv_387 @@ -267,6 +287,7 @@ libc_feresetround_387 (fenv_t *e) #ifdef __SSE2_MATH__ # define libc_feholdexcept libc_feholdexcept_sse +# define libc_fesetround libc_fesetround_sse # define libc_feholdexcept_setround libc_feholdexcept_setround_sse # define libc_fetestexcept libc_fetestexcept_sse # define libc_fesetenv libc_fesetenv_sse @@ -276,6 +297,7 @@ libc_feresetround_387 (fenv_t *e) # define libc_feresetround libc_feresetround_sse #else # define libc_feholdexcept libc_feholdexcept_387 +# define libc_fesetround libc_fesetround_387 # define libc_feholdexcept_setround libc_feholdexcept_setround_387 # define libc_fetestexcept libc_fetestexcept_387 # define libc_fesetenv libc_fesetenv_387 @@ -286,6 +308,7 @@ libc_feresetround_387 (fenv_t *e) #endif /* __SSE2_MATH__ */ #define libc_feholdexceptl libc_feholdexcept_387 +#define libc_fesetroundl libc_fesetround_387 #define libc_feholdexcept_setroundl libc_feholdexcept_setround_387 #define libc_fetestexceptl libc_fetestexcept_387 #define libc_fesetenvl libc_fesetenv_387 diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c index 68c63a1..07fe715 100644 --- a/sysdeps/ieee754/dbl-64/s_fma.c +++ b/sysdeps/ieee754/dbl-64/s_fma.c @@ -167,6 +167,9 @@ __fma (double x, double y, double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + libc_feholdexcept_setround (&env, FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1) double x1 = x * C; @@ -185,9 +188,20 @@ __fma (double x, double y, double z) t1 = m1 - t1; t2 = z - t2; double a2 = t1 + t2; + feclearexcept (FE_INEXACT); - fenv_t env; - libc_feholdexcept_setround (&env, FE_TOWARDZERO); + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + libc_feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } + + libc_fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c index c6a3d71..d576403 100644 --- a/sysdeps/ieee754/ldbl-128/s_fmal.c +++ b/sysdeps/ieee754/ldbl-128/s_fmal.c @@ -170,6 +170,10 @@ __fmal (long double x, long double y, long double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = x * C; @@ -188,9 +192,19 @@ __fmal (long double x, long double y, long double z) t1 = m1 - t1; t2 = z - t2; long double a2 = t1 + t2; + feclearexcept (FE_INEXACT); + + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; diff --git a/sysdeps/ieee754/ldbl-96/s_fma.c b/sysdeps/ieee754/ldbl-96/s_fma.c index 001d806..bf2d794 100644 --- a/sysdeps/ieee754/ldbl-96/s_fma.c +++ b/sysdeps/ieee754/ldbl-96/s_fma.c @@ -42,6 +42,10 @@ __fma (double x, double y, double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = (long double) x * C; @@ -60,9 +64,19 @@ __fma (double x, double y, double z) t1 = m1 - t1; t2 = z - t2; long double a2 = t1 + t2; + feclearexcept (FE_INEXACT); + + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ a2 = a2 + m2; diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c index b5fdcba..32e71a1 100644 --- a/sysdeps/ieee754/ldbl-96/s_fmal.c +++ b/sysdeps/ieee754/ldbl-96/s_fmal.c @@ -168,6 +168,10 @@ __fmal (long double x, long double y, long double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = x * C; @@ -186,9 +190,19 @@ __fmal (long double x, long double y, long double z) t1 = m1 - t1; t2 = z - t2; long double a2 = t1 + t2; + feclearexcept (FE_INEXACT); + + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; diff --git a/sysdeps/powerpc/fpu/fclrexcpt.c b/sysdeps/powerpc/fpu/fclrexcpt.c index 87376bf..e99cc58 100644 --- a/sysdeps/powerpc/fpu/fclrexcpt.c +++ b/sysdeps/powerpc/fpu/fclrexcpt.c @@ -1,5 +1,5 @@ /* Clear given exceptions in current floating-point environment. - Copyright (C) 1997,99,2000,01 Free Software Foundation, Inc. + Copyright (C) 1997-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -44,4 +44,5 @@ strong_alias (__feclearexcept, __old_feclearexcept) compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1); #endif +libm_hidden_ver (__feclearexcept, feclearexcept) versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2); diff --git a/sysdeps/s390/fpu/fclrexcpt.c b/sysdeps/s390/fpu/fclrexcpt.c index 2352d74..3e8d9bb 100644 --- a/sysdeps/s390/fpu/fclrexcpt.c +++ b/sysdeps/s390/fpu/fclrexcpt.c @@ -1,5 +1,5 @@ /* Clear given exceptions in current floating-point environment. - Copyright (C) 2000 Free Software Foundation, Inc. + Copyright (C) 2000-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -37,3 +37,4 @@ feclearexcept (int excepts) /* Success. */ return 0; } +libm_hidden_def (feclearexcept) diff --git a/sysdeps/sh/sh4/fpu/fclrexcpt.c b/sysdeps/sh/sh4/fpu/fclrexcpt.c index b4b2ead..2d31fa6 100644 --- a/sysdeps/sh/sh4/fpu/fclrexcpt.c +++ b/sysdeps/sh/sh4/fpu/fclrexcpt.c @@ -39,3 +39,4 @@ feclearexcept (int excepts) return 0; } +libm_hidden_def (feclearexcept) diff --git a/sysdeps/sparc/fpu/fclrexcpt.c b/sysdeps/sparc/fpu/fclrexcpt.c index 82b6ac4..50ec4f4 100644 --- a/sysdeps/sparc/fpu/fclrexcpt.c +++ b/sysdeps/sparc/fpu/fclrexcpt.c @@ -1,5 +1,5 @@ /* Clear given exceptions in current floating-point environment. - Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -39,4 +39,5 @@ strong_alias (__feclearexcept, __old_feclearexcept) compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1); #endif +libm_hidden_ver (__feclearexcept, feclearexcept) versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2); diff --git a/sysdeps/sparc/fpu/fenv_private.h b/sysdeps/sparc/fpu/fenv_private.h index a6e8e95..8690879 100644 --- a/sysdeps/sparc/fpu/fenv_private.h +++ b/sysdeps/sparc/fpu/fenv_private.h @@ -14,6 +14,15 @@ libc_feholdexcept (fenv_t *e) } static __always_inline void +libc_fesetround (int r) +{ + fenv_t etmp; + __fenv_stfsr(etmp); + etmp = (etmp & ~__FE_ROUND_MASK) | (r); + __fenv_ldfsr(etmp); +} + +static __always_inline void libc_feholdexcept_setround (fenv_t *e, int r) { fenv_t etmp; @@ -79,6 +88,7 @@ libc_feresetround (fenv_t *e) } #define libc_feholdexceptf libc_feholdexcept +#define libc_fesetroundf libc_fesetround #define libc_feholdexcept_setroundf libc_feholdexcept_setround #define libc_fetestexceptf libc_fetestexcept #define libc_fesetenvf libc_fesetenv @@ -87,6 +97,7 @@ libc_feresetround (fenv_t *e) #define libc_feholdsetroundf libc_feholdsetround #define libc_feresetroundf libc_feresetround #define libc_feholdexcept libc_feholdexcept +#define libc_fesetround libc_fesetround #define libc_feholdexcept_setround libc_feholdexcept_setround #define libc_fetestexcept libc_fetestexcept #define libc_fesetenv libc_fesetenv @@ -95,6 +106,7 @@ libc_feresetround (fenv_t *e) #define libc_feholdsetround libc_feholdsetround #define libc_feresetround libc_feresetround #define libc_feholdexceptl libc_feholdexcept +#define libc_fesetroundl libc_fesetround #define libc_feholdexcept_setroundl libc_feholdexcept_setround #define libc_fetestexceptl libc_fetestexcept #define libc_fesetenvl libc_fesetenv diff --git a/sysdeps/x86_64/fpu/fclrexcpt.c b/sysdeps/x86_64/fpu/fclrexcpt.c index 5ef3162..2d40313 100644 --- a/sysdeps/x86_64/fpu/fclrexcpt.c +++ b/sysdeps/x86_64/fpu/fclrexcpt.c @@ -1,5 +1,5 @@ /* Clear given exceptions in current floating-point environment. - Copyright (C) 2001 Free Software Foundation, Inc. + Copyright (C) 2001-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -49,3 +49,4 @@ feclearexcept (int excepts) /* Success. */ return 0; } +libm_hidden_def (feclearexcept) |