diff options
author | Bert Tenjy <bert.tenjy@gmail.com> | 2019-05-07 17:04:11 -0500 |
---|---|---|
committer | Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> | 2020-02-19 17:28:52 -0300 |
commit | 0704d322185cac23a98334f1f5918dee8069d2fe (patch) | |
tree | f4d189e9443b21ba39c51d79d823e1b71ada741b | |
parent | 9c027f11a09f5150ca71031b0919c2687113b461 (diff) | |
download | glibc-0704d322185cac23a98334f1f5918dee8069d2fe.zip glibc-0704d322185cac23a98334f1f5918dee8069d2fe.tar.gz glibc-0704d322185cac23a98334f1f5918dee8069d2fe.tar.bz2 |
PPC64: Add libmvec SIMD single-precision logarithm function [BZ #24208]
Implements single-precision vector logarithm function. The algorithm is
an adaptation of the one in sysdeps/ieee754/flt-32/e_logf.c, modified for
PPC64 VSX hardware. The version of e_logf.c referenced here is from
commit #bf27d3973d.
The patch has been tested on both Little-Endian and Big-Endian. It
passes all the tests for single-precision logarithm run by make check with
max ULP of 1. Integration into the make check infrastructure is adapted from
similar x86_64 changes in commit #774488f88a.
Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | sysdeps/powerpc/bits/math-vector.h | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/fpu/libm-test-ulps | 3 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/fpu/Versions | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile | 5 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/fpu/multiarch/math_config_flt.h | 175 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf4_vsx.c | 101 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf_data.c | 62 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist | 1 |
10 files changed, 351 insertions, 3 deletions
@@ -263,6 +263,7 @@ Major new features: - double-precision cosine: cos - single-precision cosine: cosf - double-precision logarithm: log + - single-precision logarithm: logf - double-precision sine: sin - single-precision sine: sinf - double-precision sincos: sincos diff --git a/sysdeps/powerpc/bits/math-vector.h b/sysdeps/powerpc/bits/math-vector.h index d5be086..6dc6248 100644 --- a/sysdeps/powerpc/bits/math-vector.h +++ b/sysdeps/powerpc/bits/math-vector.h @@ -38,6 +38,8 @@ # define __DECL_SIMD_cos __DECL_SIMD_PPC64 # undef __DECL_SIMD_log # define __DECL_SIMD_log __DECL_SIMD_PPC64 +# undef __DECL_SIMD_logf +# define __DECL_SIMD_logf __DECL_SIMD_PPC64 # undef __DECL_SIMD_sincos # define __DECL_SIMD_sincos __DECL_SIMD_PPC64 # undef __DECL_SIMD_sincosf diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps index 038f291..35b13ea 100644 --- a/sysdeps/powerpc/fpu/libm-test-ulps +++ b/sysdeps/powerpc/fpu/libm-test-ulps @@ -2466,6 +2466,9 @@ ifloat128: 1 ildouble: 1 ldouble: 1 +Function: "log_vlen4": +float: 1 + Function: "mul_downward_ldouble": double: 1 float: 1 diff --git a/sysdeps/powerpc/powerpc64/fpu/Versions b/sysdeps/powerpc/powerpc64/fpu/Versions index 390d8a6..c083d9f 100644 --- a/sysdeps/powerpc/powerpc64/fpu/Versions +++ b/sysdeps/powerpc/powerpc64/fpu/Versions @@ -1,6 +1,6 @@ libmvec { GLIBC_2.30 { _ZGVbN2v_cos; _ZGVbN4v_cosf; _ZGVbN2v_sin; _ZGVbN4v_sinf; - _ZGVbN2v_log; _ZGVbN2vvv_sincos; _ZGVbN4vvv_sincosf; + _ZGVbN2v_log; _ZGVbN4v_logf; _ZGVbN2vvv_sincos; _ZGVbN4vvv_sincosf; } } diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile index c23a41a..238bfe7 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile @@ -2,11 +2,14 @@ ifeq ($(subdir),mathvec) libmvec-sysdep_routines += vec_d_cos2_vsx vec_s_cosf4_vsx \ vec_d_sin2_vsx vec_s_sinf4_vsx \ vec_d_log2_vsx vec_d_log_data \ + vec_s_logf4_vsx vec_s_logf_data \ vec_d_sincos2_vsx vec_s_sincosf4_vsx CFLAGS-vec_d_cos2_vsx.c += -mabi=altivec -maltivec -mvsx -mpower8-vector CFLAGS-vec_d_log2_vsx.c += -mabi=altivec -maltivec -mvsx -mpower8-vector CFLAGS-vec_d_log_data.c += -mabi=altivec -maltivec -mvsx -mpower8-vector CFLAGS-vec_s_cosf4_vsx.c += -mabi=altivec -maltivec -mvsx +CFLAGS-vec_s_logf4_vsx.c += -mabi=altivec -maltivec -mvsx +CFLAGS-vec_s_logf_data.c += -mabi=altivec -maltivec -mvsx CFLAGS-vec_d_sin2_vsx.c += -mabi=altivec -maltivec -mvsx -mpower8-vector CFLAGS-vec_s_sinf4_vsx.c += -mabi=altivec -maltivec -mvsx CFLAGS-vec_d_sincos2_vsx.c += -mabi=altivec -maltivec -mvsx -mpower8-vector @@ -19,7 +22,7 @@ ifeq ($(build-mathvec),yes) libmvec-tests += double-vlen2 float-vlen4 double-vlen2-funcs = cos sin sincos log -float-vlen4-funcs = cos sin sincos +float-vlen4-funcs = cos sin sincos log double-vlen2-arch-ext-cflags = -mabi=altivec -maltivec -mvsx -DREQUIRE_VSX float-vlen4-arch-ext-cflags = -mabi=altivec -maltivec -mvsx -DREQUIRE_VSX diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/math_config_flt.h b/sysdeps/powerpc/powerpc64/fpu/multiarch/math_config_flt.h new file mode 100644 index 0000000..b5fbe02 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/math_config_flt.h @@ -0,0 +1,175 @@ +/* Configuration for math routines. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_CONFIG_H +#define _MATH_CONFIG_H + +#include <math.h> +#include <math_private.h> +#include <nan-high-order-bit.h> +#include <stdint.h> +#include <altivec.h> + +#ifndef WANT_ROUNDING +/* Correct special case results in non-nearest rounding modes. */ +# define WANT_ROUNDING 1 +#endif +#ifndef WANT_ERRNO +/* Set errno according to ISO C with (math_errhandling & MATH_ERRNO) != 0. */ +# define WANT_ERRNO 1 +#endif +#ifndef WANT_ERRNO_UFLOW +/* Set errno to ERANGE if result underflows to 0 (in all rounding modes). */ +# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO) +#endif + +#ifndef TOINT_INTRINSICS +/* When set, the roundtoint and converttoint functions are provided with + the semantics documented below. */ +# define TOINT_INTRINSICS 0 +#endif + +#if TOINT_INTRINSICS +/* Round x to nearest int in all rounding modes, ties have to be rounded + consistently with converttoint so the results match. If the result + would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */ +static inline double_t +roundtoint (double_t x); + +/* Convert x to nearest int in all rounding modes, ties have to be rounded + consistently with roundtoint. If the result is not representible in an + int32_t then the semantics is unspecified. */ +static inline int32_t +converttoint (double_t x); +#endif + +static inline uint32_t +asuint (float f) +{ + union + { + float f; + uint32_t i; + } u = {f}; + return u.i; +} + +static inline float +asfloat (uint32_t i) +{ + union + { + uint32_t i; + float f; + } u = {i}; + return u.f; +} + +static inline uint64_t +asuint64 (double f) +{ + union + { + double f; + uint64_t i; + } u = {f}; + return u.i; +} + +static inline double +asdouble (uint64_t i) +{ + union + { + uint64_t i; + double f; + } u = {i}; + return u.f; +} + +static inline int +issignalingf_inline (float x) +{ + uint32_t ix = asuint (x); + if (HIGH_ORDER_BIT_IS_SET_FOR_SNAN) + return (ix & 0x7fc00000) == 0x7fc00000; + return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000; +} + +#define NOINLINE __attribute__ ((noinline)) + +attribute_hidden float __math_oflowf (uint32_t); +attribute_hidden float __math_uflowf (uint32_t); +attribute_hidden float __math_may_uflowf (uint32_t); +attribute_hidden float __math_divzerof (uint32_t); +attribute_hidden float __math_invalidf (float); + +/* Shared between expf, exp2f and powf. */ +#define EXP2F_TABLE_BITS 5 +#define EXP2F_POLY_ORDER 3 +extern const struct exp2f_data +{ + uint64_t tab[1 << EXP2F_TABLE_BITS]; + double shift_scaled; + double poly[EXP2F_POLY_ORDER]; + double shift; + double invln2_scaled; + double poly_scaled[EXP2F_POLY_ORDER]; +} __exp2f_data attribute_hidden; + +#define LOGF_TABLE_BITS 4 +#define LOGF_POLY_ORDER 4 +extern const struct logf_data +{ + struct + { + float invc, logc; + } tab[1 << LOGF_TABLE_BITS]; + vector float ln2; + vector float poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */ +} __logf_data attribute_hidden; + +#define LOG2F_TABLE_BITS 4 +#define LOG2F_POLY_ORDER 4 +extern const struct log2f_data +{ + struct + { + double invc, logc; + } tab[1 << LOG2F_TABLE_BITS]; + double poly[LOG2F_POLY_ORDER]; +} __log2f_data attribute_hidden; + +#define POWF_LOG2_TABLE_BITS 4 +#define POWF_LOG2_POLY_ORDER 5 +#if TOINT_INTRINSICS +# define POWF_SCALE_BITS EXP2F_TABLE_BITS +#else +# define POWF_SCALE_BITS 0 +#endif +#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS)) +extern const struct powf_log2_data +{ + struct + { + double invc, logc; + } tab[1 << POWF_LOG2_TABLE_BITS]; + double poly[POWF_LOG2_POLY_ORDER]; +} __powf_log2_data attribute_hidden; + +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c index 87be9de..b4721b1 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c @@ -23,5 +23,5 @@ VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVbN4v_cosf) VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVbN4v_sinf) - +VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVbN4v_logf) VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf) diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf4_vsx.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf4_vsx.c new file mode 100644 index 0000000..80e5077 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf4_vsx.c @@ -0,0 +1,101 @@ +/* Single-precision vector log function. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <stdint.h> +#include <shlib-compat.h> +#include <libm-alias-float.h> +#include "math_config_flt.h" + +/* +LOGF_TABLE_BITS = 4 +LOGF_POLY_ORDER = 4. */ + +#define T __logf_data.tab +#define A __logf_data.poly +#define Ln2 __logf_data.ln2 +#define N (1 << LOGF_TABLE_BITS) +#define OFF 0x3f330000 + +vector float +_ZGVbN4v_logf (vector float x) +{ + vector float z, r, r2, y, y0, invc, logc; + vector unsigned int ix, iz, tmp, x_e, x_s, x_n, lsh; + vector signed int k, i; + + ix = (vector unsigned int) x; + x_e = ix & vec_splats ((unsigned int) 0x7f800000); + x_s = ix & vec_splats ((unsigned int) 0x007fffff); + + x_n = (vector unsigned int) (x * vec_splats (0x1p23f)); + lsh = vec_splats ((unsigned int) 23); + lsh = lsh << lsh; + x_n = x_n - lsh; + + for (int m=0; m<4; m++) + { + /* x is subnormal, normalize it. */ + if ((x_e[m] == 1) && (x_s[m] != 0)) + { + ix[m] = x_n[m]; + } + } + /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. + The range is split into N subintervals. + The ith subinterval contains z and c is near its center. */ + tmp = ix - vec_splats((unsigned int) OFF); + i = (vector signed int) + (tmp >> vec_splats ((unsigned int) (23 - LOGF_TABLE_BITS))) + % vec_splats ((unsigned int) N); + k = ((vector signed int) tmp) >> vec_splats (23); /* Arithmetic shift. */ + iz = ix - (tmp & vec_splats (0x1ff) << vec_splats (23)); + + for (int m=0; m<4; m++) + { + invc[m] = T[i[m]].invc; + logc[m] = T[i[m]].logc; + } + + z = (vector float) iz; + + /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */ + r = z * invc - vec_splats((float) 1.0); + y0 = logc + vec_float (k) * Ln2; + + /* Pipelined polynomial evaluation to approximate log1p(r). */ + r2 = r * r; + y = A[1] * r + A[2]; + y = A[0] * r2 + y; + y = y * r2 + (y0 + r); + + for (int m=0; m<4; m++) + { + /* x < 0x1p-126 or inf or nan. */ + if (__glibc_unlikely (ix[m] - 0x00800000 >= 0x7f800000 - 0x00800000)) + y[m] = logf (x[m]); + +#if WANT_ROUNDING + /* Fix sign of zero with downward rounding when x==1. */ + if (__glibc_unlikely (ix[m] == 0x3f800000)) + y[m] = 0; +#endif + } + + return y; +} diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf_data.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf_data.c new file mode 100644 index 0000000..3321b11 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf_data.c @@ -0,0 +1,62 @@ +/* Data definition for logf. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "math_config_flt.h" + +const struct logf_data __logf_data = { + .tab = { + { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 }, + { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 }, + { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 }, + { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 }, + { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 }, + { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 }, + { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 }, + { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 }, + { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 }, + { 0x1p+0, 0x0p+0 }, + { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 }, + { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 }, + { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 }, + { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 }, + { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 }, + { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 }, + }, + .ln2= { + 0x1.62e42fefa39efp-1, + 0x1.62e42fefa39efp-1, + 0x1.62e42fefa39efp-1, + 0x1.62e42fefa39efp-1}, + .poly = { + {-0x1.00ea348b88334p-2, + -0x1.00ea348b88334p-2, + -0x1.00ea348b88334p-2, + -0x1.00ea348b88334p-2 + }, + {0x1.5575b0be00b6ap-2, + 0x1.5575b0be00b6ap-2, + 0x1.5575b0be00b6ap-2, + 0x1.5575b0be00b6ap-2 + }, + {-0x1.ffffef20a4123p-2, + -0x1.ffffef20a4123p-2, + -0x1.ffffef20a4123p-2, + -0x1.ffffef20a4123p-2 + }, + } +}; diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist index fccd40e..bb9f0be 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist @@ -3,5 +3,6 @@ GLIBC_2.30 _ZGVbN2v_log F GLIBC_2.30 _ZGVbN2v_sin F GLIBC_2.30 _ZGVbN2vvv_sincos F GLIBC_2.30 _ZGVbN4v_cosf F +GLIBC_2.30 _ZGVbN4v_logf F GLIBC_2.30 _ZGVbN4v_sinf F GLIBC_2.30 _ZGVbN4vvv_sincosf F |