diff options
author | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2017-09-04 17:53:47 +0100 |
---|---|---|
committer | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2017-09-29 17:17:41 +0100 |
commit | 875c76c7048fe4a8c67c265cdae057cb3ca19f44 (patch) | |
tree | ee66c284e87c2a5b62b7d6e39fcdd0295bd058cc | |
parent | 90c42e40d06ee35c4a2206fa2e00f49a988ee0c1 (diff) | |
download | glibc-875c76c7048fe4a8c67c265cdae057cb3ca19f44.zip glibc-875c76c7048fe4a8c67c265cdae057cb3ca19f44.tar.gz glibc-875c76c7048fe4a8c67c265cdae057cb3ca19f44.tar.bz2 |
New generic log2f
Similar to the new logf: double precision arithmetics and a small
lookup table is used. The argument reduction step is the same as in
the new logf.
without wrapper on aarch64:
log2f reciprocal-throughput: 2.3x faster
log2f latency: 2.1x faster
old worst case error: 1.72 ulp
new worst case error: 0.75 ulp
aarch64 .text size: -252 bytes
aarch64 .rodata size: +244 bytes
* math/Makefile (type-float-routines): Add e_log2f_data.
* sysdeps/ieee754/flt-32/e_log2f.c: New implementation.
* sysdeps/ieee754/flt-32/e_log2f_data.c: New file.
* sysdeps/ieee754/flt-32/math_config.h (__log2f_data): Define.
(LOG2F_TABLE_BITS, LOG2F_POLY_ORDER): Define.
* sysdeps/i386/fpu/e_log2f_data.c: New file.
* sysdeps/ia64/fpu/e_log2f_data.c: New file.
* sysdeps/m68k/m680x0/fpu/e_log2f_data.c: New file.
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | math/Makefile | 3 | ||||
-rw-r--r-- | sysdeps/i386/fpu/e_log2f_data.c | 1 | ||||
-rw-r--r-- | sysdeps/ia64/fpu/e_log2f_data.c | 1 | ||||
-rw-r--r-- | sysdeps/ieee754/flt-32/e_log2f.c | 148 | ||||
-rw-r--r-- | sysdeps/ieee754/flt-32/e_log2f_data.c | 44 | ||||
-rw-r--r-- | sysdeps/ieee754/flt-32/math_config.h | 11 | ||||
-rw-r--r-- | sysdeps/m68k/m680x0/fpu/e_log2f_data.c | 1 |
9 files changed, 147 insertions, 75 deletions
@@ -1,5 +1,16 @@ 2017-09-29 Szabolcs Nagy <szabolcs.nagy@arm.com> + * math/Makefile (type-float-routines): Add e_log2f_data. + * sysdeps/ieee754/flt-32/e_log2f.c: New implementation. + * sysdeps/ieee754/flt-32/e_log2f_data.c: New file. + * sysdeps/ieee754/flt-32/math_config.h (__log2f_data): Define. + (LOG2F_TABLE_BITS, LOG2F_POLY_ORDER): Define. + * sysdeps/i386/fpu/e_log2f_data.c: New file. + * sysdeps/ia64/fpu/e_log2f_data.c: New file. + * sysdeps/m68k/m680x0/fpu/e_log2f_data.c: New file. + +2017-09-29 Szabolcs Nagy <szabolcs.nagy@arm.com> + * math/Makefile (type-float-routines): Add e_logf_data. * sysdeps/ieee754/flt-32/e_logf.c: New implementation. * sysdeps/ieee754/flt-32/e_logf_data.c: New file. @@ -14,7 +14,7 @@ Major new features: * Optimized x86-64 trunc and truncf for processors with SSE4.1. -* Optimized generic expf, exp2f, logf. +* Optimized generic expf, exp2f, logf, log2f. * In order to support faster and safer process termination the malloc API family of functions will no longer print a failure address and stack diff --git a/math/Makefile b/math/Makefile index 919fec1..b4b3101 100644 --- a/math/Makefile +++ b/math/Makefile @@ -115,7 +115,8 @@ type-double-routines := branred doasin dosincos halfulp mpa mpatan2 \ # float support type-float-suffix := f -type-float-routines := k_rem_pio2f math_errf e_exp2f_data e_logf_data +type-float-routines := k_rem_pio2f math_errf e_exp2f_data e_logf_data \ + e_log2f_data # _Float128 support type-float128-suffix := f128 diff --git a/sysdeps/i386/fpu/e_log2f_data.c b/sysdeps/i386/fpu/e_log2f_data.c new file mode 100644 index 0000000..1cc8931 --- /dev/null +++ b/sysdeps/i386/fpu/e_log2f_data.c @@ -0,0 +1 @@ +/* Not needed. */ diff --git a/sysdeps/ia64/fpu/e_log2f_data.c b/sysdeps/ia64/fpu/e_log2f_data.c new file mode 100644 index 0000000..1cc8931 --- /dev/null +++ b/sysdeps/ia64/fpu/e_log2f_data.c @@ -0,0 +1 @@ +/* Not needed. */ diff --git a/sysdeps/ieee754/flt-32/e_log2f.c b/sysdeps/ieee754/flt-32/e_log2f.c index 782d901..6c42f27 100644 --- a/sysdeps/ieee754/flt-32/e_log2f.c +++ b/sysdeps/ieee754/flt-32/e_log2f.c @@ -1,86 +1,88 @@ -/* e_logf.c -- float version of e_log.c. - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - * adapted for log2 by Ulrich Drepper <drepper@cygnus.com> - */ +/* Single-precision log2 function. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <math.h> -#include <math_private.h> -#include <fix-int-fp-convert-zero.h> +#include <stdint.h> +#include "math_config.h" + +/* +LOG2F_TABLE_BITS = 4 +LOG2F_POLY_ORDER = 4 -static const float -ln2 = 0.69314718055994530942, -two25 = 3.355443200e+07, /* 0x4c000000 */ -Lg1 = 6.6666668653e-01, /* 3F2AAAAB */ -Lg2 = 4.0000000596e-01, /* 3ECCCCCD */ -Lg3 = 2.8571429849e-01, /* 3E924925 */ -Lg4 = 2.2222198546e-01, /* 3E638E29 */ -Lg5 = 1.8183572590e-01, /* 3E3A3325 */ -Lg6 = 1.5313838422e-01, /* 3E1CD04F */ -Lg7 = 1.4798198640e-01; /* 3E178897 */ +ULP error: 0.752 (nearest rounding.) +Relative error: 1.9 * 2^-26 (before rounding.) +*/ -static const float zero = 0.0; +#define N (1 << LOG2F_TABLE_BITS) +#define T __log2f_data.tab +#define A __log2f_data.poly +#define OFF 0x3f330000 float -__ieee754_log2f(float x) +__ieee754_log2f (float x) { - float hfsq,f,s,z,R,w,t1,t2,dk; - int32_t k,ix,i,j; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t z, r, r2, p, y, y0, invc, logc; + uint32_t ix, iz, top, tmp; + int k, i; + + ix = asuint (x); +#if WANT_ROUNDING + /* Fix sign of zero with downward rounding when x==1. */ + if (__glibc_unlikely (ix == 0x3f800000)) + return 0; +#endif + if (__glibc_unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000)) + { + /* x < 0x1p-126 or inf or nan. */ + if (ix * 2 == 0) + return __math_divzerof (1); + if (ix == 0x7f800000) /* log2(inf) == inf. */ + return x; + if ((ix & 0x80000000) || ix * 2 >= 0xff000000) + return __math_invalidf (x); + /* x is subnormal, normalize it. */ + ix = asuint (x * 0x1p23f); + ix -= 23 << 23; + } + + /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. + The range is split into N subintervals. + The ith subinterval contains z and c is near its center. */ + tmp = ix - OFF; + i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N; + top = tmp & 0xff800000; + iz = ix - top; + k = (int32_t) tmp >> 23; /* arithmetic shift */ + invc = T[i].invc; + logc = T[i].logc; + z = (double_t) asfloat (iz); - GET_FLOAT_WORD(ix,x); + /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */ + r = z * invc - 1; + y0 = logc + (double_t) k; - k=0; - if (ix < 0x00800000) { /* x < 2**-126 */ - if (__builtin_expect((ix&0x7fffffff)==0, 0)) - return -two25/__fabsf (x); /* log(+-0)=-inf */ - if (__builtin_expect(ix<0, 0)) - return (x-x)/(x-x); /* log(-#) = NaN */ - k -= 25; x *= two25; /* subnormal number, scale up x */ - GET_FLOAT_WORD(ix,x); - } - if (__builtin_expect(ix >= 0x7f800000, 0)) return x+x; - k += (ix>>23)-127; - ix &= 0x007fffff; - i = (ix+(0x95f64<<3))&0x800000; - SET_FLOAT_WORD(x,ix|(i^0x3f800000)); /* normalize x or x/2 */ - k += (i>>23); - dk = (float)k; - f = x-(float)1.0; - if((0x007fffff&(15+ix))<16) { /* |f| < 2**-20 */ - if(f==zero) - { - if (FIX_INT_FP_CONVERT_ZERO && dk == 0.0f) - dk = 0.0f; - return dk; - } - R = f*f*((float)0.5-(float)0.33333333333333333*f); - return dk-(R-f)/ln2; - } - s = f/((float)2.0+f); - z = s*s; - i = ix-(0x6147a<<3); - w = z*z; - j = (0x6b851<<3)-ix; - t1= w*(Lg2+w*(Lg4+w*Lg6)); - t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); - i |= j; - R = t2+t1; - if(i>0) { - hfsq=(float)0.5*f*f; - return dk-((hfsq-(s*(hfsq+R)))-f)/ln2; - } else { - return dk-((s*(f-R))-f)/ln2; - } + /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */ + r2 = r * r; + y = A[1] * r + A[2]; + y = A[0] * r2 + y; + p = A[3] * r + y0; + y = y * r2 + p; + return (float) y; } strong_alias (__ieee754_log2f, __log2f_finite) diff --git a/sysdeps/ieee754/flt-32/e_log2f_data.c b/sysdeps/ieee754/flt-32/e_log2f_data.c new file mode 100644 index 0000000..e39de3b --- /dev/null +++ b/sysdeps/ieee754/flt-32/e_log2f_data.c @@ -0,0 +1,44 @@ +/* Data definition for log2f. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "math_config.h" + +const struct log2f_data __log2f_data = { + .tab = { + { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 }, + { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 }, + { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 }, + { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 }, + { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 }, + { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 }, + { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 }, + { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 }, + { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 }, + { 0x1p+0, 0x0p+0 }, + { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 }, + { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 }, + { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 }, + { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 }, + { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 }, + { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 }, + }, + .poly = { + -0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1, + 0x1.715475f35c8b8p0, + } +}; diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h index 953a4bc..f869fbc 100644 --- a/sysdeps/ieee754/flt-32/math_config.h +++ b/sysdeps/ieee754/flt-32/math_config.h @@ -123,4 +123,15 @@ extern const struct logf_data double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */ } __logf_data attribute_hidden; +#define LOG2F_TABLE_BITS 4 +#define LOG2F_POLY_ORDER 4 +extern const struct log2f_data +{ + struct + { + double invc, logc; + } tab[1 << LOG2F_TABLE_BITS]; + double poly[LOG2F_POLY_ORDER]; +} __log2f_data attribute_hidden; + #endif diff --git a/sysdeps/m68k/m680x0/fpu/e_log2f_data.c b/sysdeps/m68k/m680x0/fpu/e_log2f_data.c new file mode 100644 index 0000000..1cc8931 --- /dev/null +++ b/sysdeps/m68k/m680x0/fpu/e_log2f_data.c @@ -0,0 +1 @@ +/* Not needed. */ |