PPC64: Add libmvec SIMD single-precision logarithm function [BZ #24208]

Implements single-precision vector logarithm function. The algorithm is an adaptation of the one in sysdeps/ieee754/flt-32/e_logf.c, modified for PPC64 VSX hardware. The version of e_logf.c referenced here is from commit #bf27d3973d. The patch has been tested on both Little-Endian and Big-Endian. It passes all the tests for single-precision logarithm run by make check with max ULP of 1. Integration into the make check infrastructure is adapted from similar x86_64 changes in commit #774488f88a. Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
author: Bert Tenjy <bert.tenjy@gmail.com> 2019-05-07 17:04:11 -0500
committer: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> 2020-02-19 17:28:52 -0300
commit: 0704d322185cac23a98334f1f5918dee8069d2fe (patch)
tree: f4d189e9443b21ba39c51d79d823e1b71ada741b
parent: 9c027f11a09f5150ca71031b0919c2687113b461 (diff)
download: glibc-0704d322185cac23a98334f1f5918dee8069d2fe.zip
glibc-0704d322185cac23a98334f1f5918dee8069d2fe.tar.gz
glibc-0704d322185cac23a98334f1f5918dee8069d2fe.tar.bz2
10 files changed, 351 insertions, 3 deletions
diff --git a/NEWS b/NEWS
index c17cc26..0b87390 100644
--- a/NEWS
+++ b/NEWS
@@ -263,6 +263,7 @@ Major new features:
   - double-precision cosine: cos
   - single-precision cosine: cosf
   - double-precision logarithm: log
+  - single-precision logarithm: logf
   - double-precision sine: sin
   - single-precision sine: sinf
   - double-precision sincos: sincos
diff --git a/sysdeps/powerpc/bits/math-vector.h b/sysdeps/powerpc/bits/math-vector.h
index d5be086..6dc6248 100644
--- a/sysdeps/powerpc/bits/math-vector.h
+++ b/sysdeps/powerpc/bits/math-vector.h
@@ -38,6 +38,8 @@
 #  define __DECL_SIMD_cos __DECL_SIMD_PPC64
 #  undef __DECL_SIMD_log
 #  define __DECL_SIMD_log __DECL_SIMD_PPC64
+#  undef __DECL_SIMD_logf
+#  define __DECL_SIMD_logf __DECL_SIMD_PPC64
 #  undef __DECL_SIMD_sincos
 #  define __DECL_SIMD_sincos __DECL_SIMD_PPC64
 #  undef __DECL_SIMD_sincosf
diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
index 038f291..35b13ea 100644
--- a/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/sysdeps/powerpc/fpu/libm-test-ulps
@@ -2466,6 +2466,9 @@ ifloat128: 1
 ildouble: 1
 ldouble: 1
 
+Function: "log_vlen4":
+float: 1
+
 Function: "mul_downward_ldouble":
 double: 1
 float: 1
diff --git a/sysdeps/powerpc/powerpc64/fpu/Versions b/sysdeps/powerpc/powerpc64/fpu/Versions
index 390d8a6..c083d9f 100644
--- a/sysdeps/powerpc/powerpc64/fpu/Versions
+++ b/sysdeps/powerpc/powerpc64/fpu/Versions
@@ -1,6 +1,6 @@
 libmvec {
   GLIBC_2.30 {
     _ZGVbN2v_cos; _ZGVbN4v_cosf; _ZGVbN2v_sin; _ZGVbN4v_sinf;
-    _ZGVbN2v_log; _ZGVbN2vvv_sincos; _ZGVbN4vvv_sincosf;
+    _ZGVbN2v_log; _ZGVbN4v_logf; _ZGVbN2vvv_sincos; _ZGVbN4vvv_sincosf;
   }
 }
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
index c23a41a..238bfe7 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
@@ -2,11 +2,14 @@ ifeq ($(subdir),mathvec)
 libmvec-sysdep_routines += vec_d_cos2_vsx vec_s_cosf4_vsx \
 			   vec_d_sin2_vsx vec_s_sinf4_vsx \
 			   vec_d_log2_vsx vec_d_log_data \
+			   vec_s_logf4_vsx vec_s_logf_data \
 			   vec_d_sincos2_vsx vec_s_sincosf4_vsx
 CFLAGS-vec_d_cos2_vsx.c += -mabi=altivec -maltivec -mvsx -mpower8-vector
 CFLAGS-vec_d_log2_vsx.c += -mabi=altivec -maltivec -mvsx -mpower8-vector
 CFLAGS-vec_d_log_data.c += -mabi=altivec -maltivec -mvsx -mpower8-vector
 CFLAGS-vec_s_cosf4_vsx.c += -mabi=altivec -maltivec -mvsx
+CFLAGS-vec_s_logf4_vsx.c += -mabi=altivec -maltivec -mvsx
+CFLAGS-vec_s_logf_data.c += -mabi=altivec -maltivec -mvsx
 CFLAGS-vec_d_sin2_vsx.c += -mabi=altivec -maltivec -mvsx -mpower8-vector
 CFLAGS-vec_s_sinf4_vsx.c += -mabi=altivec -maltivec -mvsx
 CFLAGS-vec_d_sincos2_vsx.c += -mabi=altivec -maltivec -mvsx -mpower8-vector
@@ -19,7 +22,7 @@ ifeq ($(build-mathvec),yes)
 libmvec-tests += double-vlen2 float-vlen4
 
 double-vlen2-funcs = cos sin sincos log
-float-vlen4-funcs = cos sin sincos
+float-vlen4-funcs = cos sin sincos log
 
 double-vlen2-arch-ext-cflags = -mabi=altivec -maltivec -mvsx -DREQUIRE_VSX
 float-vlen4-arch-ext-cflags = -mabi=altivec -maltivec -mvsx -DREQUIRE_VSX
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/math_config_flt.h b/sysdeps/powerpc/powerpc64/fpu/multiarch/math_config_flt.h
new file mode 100644
index 0000000..b5fbe02
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/math_config_flt.h
@@ -0,0 +1,175 @@
+/* Configuration for math routines.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MATH_CONFIG_H
+#define _MATH_CONFIG_H
+
+#include <math.h>
+#include <math_private.h>
+#include <nan-high-order-bit.h>
+#include <stdint.h>
+#include <altivec.h>
+
+#ifndef WANT_ROUNDING
+/* Correct special case results in non-nearest rounding modes.  */
+# define WANT_ROUNDING 1
+#endif
+#ifndef WANT_ERRNO
+/* Set errno according to ISO C with (math_errhandling & MATH_ERRNO) != 0.  */
+# define WANT_ERRNO 1
+#endif
+#ifndef WANT_ERRNO_UFLOW
+/* Set errno to ERANGE if result underflows to 0 (in all rounding modes).  */
+# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
+#endif
+
+#ifndef TOINT_INTRINSICS
+/* When set, the roundtoint and converttoint functions are provided with
+   the semantics documented below.  */
+# define TOINT_INTRINSICS 0
+#endif
+
+#if TOINT_INTRINSICS
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+   consistently with converttoint so the results match.  If the result
+   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
+static inline double_t
+roundtoint (double_t x);
+
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+   consistently with roundtoint.  If the result is not representible in an
+   int32_t then the semantics is unspecified.  */
+static inline int32_t
+converttoint (double_t x);
+#endif
+
+static inline uint32_t
+asuint (float f)
+{
+  union
+  {
+    float f;
+    uint32_t i;
+  } u = {f};
+  return u.i;
+}
+
+static inline float
+asfloat (uint32_t i)
+{
+  union
+  {
+    uint32_t i;
+    float f;
+  } u = {i};
+  return u.f;
+}
+
+static inline uint64_t
+asuint64 (double f)
+{
+  union
+  {
+    double f;
+    uint64_t i;
+  } u = {f};
+  return u.i;
+}
+
+static inline double
+asdouble (uint64_t i)
+{
+  union
+  {
+    uint64_t i;
+    double f;
+  } u = {i};
+  return u.f;
+}
+
+static inline int
+issignalingf_inline (float x)
+{
+  uint32_t ix = asuint (x);
+  if (HIGH_ORDER_BIT_IS_SET_FOR_SNAN)
+    return (ix & 0x7fc00000) == 0x7fc00000;
+  return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
+}
+
+#define NOINLINE __attribute__ ((noinline))
+
+attribute_hidden float __math_oflowf (uint32_t);
+attribute_hidden float __math_uflowf (uint32_t);
+attribute_hidden float __math_may_uflowf (uint32_t);
+attribute_hidden float __math_divzerof (uint32_t);
+attribute_hidden float __math_invalidf (float);
+
+/* Shared between expf, exp2f and powf.  */
+#define EXP2F_TABLE_BITS 5
+#define EXP2F_POLY_ORDER 3
+extern const struct exp2f_data
+{
+  uint64_t tab[1 << EXP2F_TABLE_BITS];
+  double shift_scaled;
+  double poly[EXP2F_POLY_ORDER];
+  double shift;
+  double invln2_scaled;
+  double poly_scaled[EXP2F_POLY_ORDER];
+} __exp2f_data attribute_hidden;
+
+#define LOGF_TABLE_BITS 4
+#define LOGF_POLY_ORDER 4
+extern const struct logf_data
+{
+  struct
+  {
+    float invc, logc;
+  } tab[1 << LOGF_TABLE_BITS];
+  vector float ln2;
+  vector float poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
+} __logf_data attribute_hidden;
+
+#define LOG2F_TABLE_BITS 4
+#define LOG2F_POLY_ORDER 4
+extern const struct log2f_data
+{
+  struct
+  {
+    double invc, logc;
+  } tab[1 << LOG2F_TABLE_BITS];
+  double poly[LOG2F_POLY_ORDER];
+} __log2f_data attribute_hidden;
+
+#define POWF_LOG2_TABLE_BITS 4
+#define POWF_LOG2_POLY_ORDER 5
+#if TOINT_INTRINSICS
+# define POWF_SCALE_BITS EXP2F_TABLE_BITS
+#else
+# define POWF_SCALE_BITS 0
+#endif
+#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
+extern const struct powf_log2_data
+{
+  struct
+  {
+    double invc, logc;
+  } tab[1 << POWF_LOG2_TABLE_BITS];
+  double poly[POWF_LOG2_POLY_ORDER];
+} __powf_log2_data attribute_hidden;
+
+#endif
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c
index 87be9de..b4721b1 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c
@@ -23,5 +23,5 @@
 
 VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVbN4v_cosf)
 VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVbN4v_sinf)
-
+VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVbN4v_logf)
 VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf)
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf4_vsx.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf4_vsx.c
new file mode 100644
index 0000000..80e5077
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf4_vsx.c
@@ -0,0 +1,101 @@
+/* Single-precision vector log function.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include <stdint.h>
+#include <shlib-compat.h>
+#include <libm-alias-float.h>
+#include "math_config_flt.h"
+
+/*
+LOGF_TABLE_BITS = 4
+LOGF_POLY_ORDER = 4.  */
+
+#define T __logf_data.tab
+#define A __logf_data.poly
+#define Ln2 __logf_data.ln2
+#define N (1 << LOGF_TABLE_BITS)
+#define OFF 0x3f330000
+
+vector float
+_ZGVbN4v_logf (vector float x)
+{
+  vector float z, r, r2, y, y0, invc, logc;
+  vector unsigned int ix, iz, tmp, x_e, x_s, x_n, lsh;
+  vector signed int k, i;
+
+  ix = (vector unsigned int) x;
+  x_e = ix & vec_splats ((unsigned int) 0x7f800000);
+  x_s = ix & vec_splats ((unsigned int) 0x007fffff);
+
+  x_n = (vector unsigned int) (x * vec_splats (0x1p23f));
+  lsh = vec_splats ((unsigned int) 23);
+  lsh = lsh << lsh;
+  x_n = x_n - lsh;
+
+  for (int m=0; m<4; m++)
+  {
+    /* x is subnormal, normalize it.  */
+    if ((x_e[m] == 1) && (x_s[m] != 0))
+    {
+      ix[m] = x_n[m];
+    }
+  }
+  /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - vec_splats((unsigned int) OFF);
+  i = (vector signed int)
+      (tmp >> vec_splats ((unsigned int) (23 - LOGF_TABLE_BITS)))
+      % vec_splats ((unsigned int) N);
+  k = ((vector signed int) tmp) >> vec_splats (23); /* Arithmetic shift.  */
+  iz = ix - (tmp & vec_splats (0x1ff) << vec_splats (23));
+
+  for (int m=0; m<4; m++)
+  {
+    invc[m] = T[i[m]].invc;
+    logc[m] = T[i[m]].logc;
+  }
+
+  z = (vector float) iz;
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
+  r = z * invc - vec_splats((float) 1.0);
+  y0 = logc + vec_float (k) * Ln2;
+
+  /* Pipelined polynomial evaluation to approximate log1p(r).  */
+  r2 = r * r;
+  y = A[1] * r + A[2];
+  y = A[0] * r2 + y;
+  y = y * r2 + (y0 + r);
+
+  for (int m=0; m<4; m++)
+  {
+    /* x < 0x1p-126 or inf or nan.  */
+    if (__glibc_unlikely (ix[m] - 0x00800000 >= 0x7f800000 - 0x00800000))
+      y[m] = logf (x[m]);
+
+#if WANT_ROUNDING
+    /* Fix sign of zero with downward rounding when x==1.  */
+    if (__glibc_unlikely (ix[m] == 0x3f800000))
+      y[m] = 0;
+#endif
+  }
+
+  return y;
+}
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf_data.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf_data.c
new file mode 100644
index 0000000..3321b11
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_logf_data.c
@@ -0,0 +1,62 @@
+/* Data definition for logf.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "math_config_flt.h"
+
+const struct logf_data __logf_data = {
+  .tab = {
+  { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
+  { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
+  { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
+  { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
+  { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
+  { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
+  { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
+  { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
+  { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
+  { 0x1p+0, 0x0p+0 },
+  { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
+  { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
+  { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
+  { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
+  { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
+  { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
+  },
+  .ln2= {
+      0x1.62e42fefa39efp-1,
+      0x1.62e42fefa39efp-1,
+      0x1.62e42fefa39efp-1,
+      0x1.62e42fefa39efp-1},
+  .poly = {
+      {-0x1.00ea348b88334p-2,
+      -0x1.00ea348b88334p-2,
+      -0x1.00ea348b88334p-2,
+      -0x1.00ea348b88334p-2
+      },
+      {0x1.5575b0be00b6ap-2,
+      0x1.5575b0be00b6ap-2,
+      0x1.5575b0be00b6ap-2,
+      0x1.5575b0be00b6ap-2
+      },
+      {-0x1.ffffef20a4123p-2,
+      -0x1.ffffef20a4123p-2,
+      -0x1.ffffef20a4123p-2,
+      -0x1.ffffef20a4123p-2
+      },
+  }
+};
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
index fccd40e..bb9f0be 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
@@ -3,5 +3,6 @@ GLIBC_2.30 _ZGVbN2v_log F
 GLIBC_2.30 _ZGVbN2v_sin F
 GLIBC_2.30 _ZGVbN2vvv_sincos F
 GLIBC_2.30 _ZGVbN4v_cosf F
+GLIBC_2.30 _ZGVbN4v_logf F
 GLIBC_2.30 _ZGVbN4v_sinf F
 GLIBC_2.30 _ZGVbN4vvv_sincosf F
author	Bert Tenjy <bert.tenjy@gmail.com>	2019-05-07 17:04:11 -0500
committer	Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>	2020-02-19 17:28:52 -0300
commit	0704d322185cac23a98334f1f5918dee8069d2fe (patch)
tree	f4d189e9443b21ba39c51d79d823e1b71ada741b
parent	9c027f11a09f5150ca71031b0919c2687113b461 (diff)
download	glibc-0704d322185cac23a98334f1f5918dee8069d2fe.zip glibc-0704d322185cac23a98334f1f5918dee8069d2fe.tar.gz glibc-0704d322185cac23a98334f1f5918dee8069d2fe.tar.bz2