diff options
author | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2013-11-25 06:12:30 -0600 |
---|---|---|
committer | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2013-11-25 06:34:41 -0600 |
commit | 41e8926aa4b7f17bc95984737ee82a254ad0911c (patch) | |
tree | f113bbedd0ec991ae1b9dddf3a36077e22d19815 /sysdeps/ieee754 | |
parent | 085b5ddfe301eb74bc9f824ff293f054c1c910b7 (diff) | |
download | glibc-41e8926aa4b7f17bc95984737ee82a254ad0911c.zip glibc-41e8926aa4b7f17bc95984737ee82a254ad0911c.tar.gz glibc-41e8926aa4b7f17bc95984737ee82a254ad0911c.tar.bz2 |
PowerPC: Set/restore rounding mode only when needed
This patch helps some math functions performance by adding the libc_fexxx
variant of inline functions to handle both FPU round and exception set/restore
and by using them on the libc_fexxx_ctx functions. It is based on already coded
fexxx family functions for PPC with fpu.
Here is the summary of performance improvements due this patch (measured on a
POWER7 machine):
Before:
cos(): ITERS:9.5895e+07: TOTAL:5116.03Mcy, MAX:77.6cy, MIN:49.792cy, 18744 calls/Mcy
exp(): ITERS:2.827e+07: TOTAL:5187.15Mcy, MAX:494.018cy, MIN:38.422cy, 5450.01 calls/Mcy
pow(): ITERS:6.1705e+07: TOTAL:5144.26Mcy, MAX:171.95cy, MIN:29.935cy, 11994.9 calls/Mcy
sin(): ITERS:8.6898e+07: TOTAL:5117.06Mcy, MAX:83.841cy, MIN:46.582cy, 16982 calls/Mcy
tan(): ITERS:2.9473e+07: TOTAL:5115.39Mcy, MAX:191.017cy, MIN:172.352cy, 5761.63 calls/Mcy
After:
cos(): ITERS:2.05265e+08: TOTAL:5111.37Mcy, MAX:78.754cy, MIN:24.196cy, 40158.5 calls/Mcy
exp(): ITERS:3.341e+07: TOTAL:5170.84Mcy, MAX:476.317cy, MIN:15.574cy, 6461.23 calls/Mcy
pow(): ITERS:7.6153e+07: TOTAL:5129.1Mcy, MAX:147.5cy, MIN:30.916cy, 14847.2 calls/Mcy
sin(): ITERS:1.58816e+08: TOTAL:5115.11Mcy, MAX:1490.39cy, MIN:22.341cy, 31048.4 calls/Mcy
tan(): ITERS:3.4964e+07: TOTAL:5114.18Mcy, MAX:177.422cy, MIN:146.115cy, 6836.68 calls/Mcy
Diffstat (limited to 'sysdeps/ieee754')
-rw-r--r-- | sysdeps/ieee754/ldbl-128ibm/e_expl.c | 31 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128ibm/s_llrintl.c | 2 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128ibm/s_lrintl.c | 2 | ||||
-rw-r--r-- | sysdeps/ieee754/ldbl-128ibm/s_rintl.c | 2 |
4 files changed, 16 insertions, 21 deletions
diff --git a/sysdeps/ieee754/ldbl-128ibm/e_expl.c b/sysdeps/ieee754/ldbl-128ibm/e_expl.c index f7c50bf..65ef185 100644 --- a/sysdeps/ieee754/ldbl-128ibm/e_expl.c +++ b/sysdeps/ieee754/ldbl-128ibm/e_expl.c @@ -134,18 +134,17 @@ static const long double C[] = { long double __ieee754_expl (long double x) { + long double result, x22; + union ibm_extended_long_double ex2_u, scale_u; + int unsafe; + /* Check for usual case. */ if (isless (x, himark) && isgreater (x, lomark)) { - int tval1, tval2, unsafe, n_i, exponent2; - long double x22, n, result, xl; - union ibm_extended_long_double ex2_u, scale_u; - fenv_t oldenv; - - feholdexcept (&oldenv); -#ifdef FE_TONEAREST - fesetround (FE_TONEAREST); -#endif + int tval1, tval2, n_i, exponent2; + long double n, xl; + + SET_RESTORE_ROUND (FE_TONEAREST); n = __roundl (x*M_1_LN2); x = x-n*M_LN2_0; @@ -201,11 +200,6 @@ __ieee754_expl (long double x) less than 4.8e-39. */ x22 = x + x*x*(P1+x*(P2+x*(P3+x*(P4+x*(P5+x*P6))))); - /* Return result. */ - fesetenv (&oldenv); - - result = x22 * ex2_u.ld + ex2_u.ld; - /* Now we can test whether the result is ultimate or if we are unsure. In the later case we should probably call a mpn based routine to give the ultimate result. @@ -235,10 +229,6 @@ __ieee754_expl (long double x) return __ieee754_expl_proc2 (origx); } */ - if (!unsafe) - return result; - else - return result * scale_u.ld; } /* Exceptional cases: */ else if (isless (x, himark)) @@ -253,5 +243,10 @@ __ieee754_expl (long double x) else /* Return x, if x is a NaN or Inf; or overflow, otherwise. */ return TWO1023*x; + + result = x22 * ex2_u.ld + ex2_u.ld; + if (!unsafe) + return result; + return result * scale_u.ld; } strong_alias (__ieee754_expl, __expl_finite) diff --git a/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c b/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c index 8560349..3503973 100644 --- a/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c +++ b/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c @@ -43,7 +43,7 @@ __llrintl (long double x) #endif ) { - save_round = fegetround (); + save_round = __fegetround (); if (__builtin_expect ((xh == -(double) (-__LONG_LONG_MAX__ - 1)), 0)) { diff --git a/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c b/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c index 588098d..49dbd42 100644 --- a/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c +++ b/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c @@ -49,7 +49,7 @@ __lrintl (long double x) #endif ) { - save_round = fegetround (); + save_round = __fegetround (); #if __LONG_MAX__ == 2147483647 long long llhi = (long long) xh; diff --git a/sysdeps/ieee754/ldbl-128ibm/s_rintl.c b/sysdeps/ieee754/ldbl-128ibm/s_rintl.c index 48dbe85..5fd6bb8 100644 --- a/sysdeps/ieee754/ldbl-128ibm/s_rintl.c +++ b/sysdeps/ieee754/ldbl-128ibm/s_rintl.c @@ -40,7 +40,7 @@ __rintl (long double x) __builtin_inf ()), 1)) { double orig_xh; - int save_round = fegetround (); + int save_round = __fegetround (); /* Long double arithmetic, including the canonicalisation below, only works in round-to-nearest mode. */ |