aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBert Tenjy <bert.tenjy@gmail.com>2019-03-07 07:22:34 +0000
committerTulio Magno Quites Machado Filho <tuliom@linux.ibm.com>2020-02-19 17:27:29 -0300
commitea461ef2fef8fada01b27839a3ce761053f5d1ea (patch)
treed3375cb635def5f2091651c957962b097b8eff5d
parentdbe5898b5f3b76c205fea9094afb7f1defd49b66 (diff)
downloadglibc-ea461ef2fef8fada01b27839a3ce761053f5d1ea.zip
glibc-ea461ef2fef8fada01b27839a3ce761053f5d1ea.tar.gz
glibc-ea461ef2fef8fada01b27839a3ce761053f5d1ea.tar.bz2
PPC64: Add libmvec SIMD single-precision sine function [BZ #24206]
Implements single-precision vector sine function. The polynomial sine-approximating algorithm is adapted for PPC64 from x86_64 [commit #2a8c2c7b33]. The patch has been tested on PPC64/POWER8 Little Endian and Big Endian. Testing uses the framework created for libmvec on x86_64 which runs tests on issuing 'make check'. Tests of the new vector single-precision sine function all pass. Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
-rw-r--r--NEWS1
-rw-r--r--sysdeps/powerpc/fpu/libm-test-ulps3
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/Versions2
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile5
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c113
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h28
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist1
8 files changed, 139 insertions, 15 deletions
diff --git a/NEWS b/NEWS
index d17067c..4a7e434 100644
--- a/NEWS
+++ b/NEWS
@@ -263,6 +263,7 @@ Major new features:
- double-precision cosine: cos
- single-precision cosine: cosf
- double-precision sine: sin
+ - single-precision sine: sinf
GCC support for auto-vectorization of functions on PPC64 is not yet
available. Until that is done, the new vector math functions are
diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
index e72452e..32a7a84 100644
--- a/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/sysdeps/powerpc/fpu/libm-test-ulps
@@ -2573,6 +2573,9 @@ ldouble: 5
Function: "sin_vlen2":
double: 2
+Function: "sin_vlen4":
+float: 1
+
Function: "sincos":
double: 1
float: 1
diff --git a/sysdeps/powerpc/powerpc64/fpu/Versions b/sysdeps/powerpc/powerpc64/fpu/Versions
index 4852a27..f7c8fd8 100644
--- a/sysdeps/powerpc/powerpc64/fpu/Versions
+++ b/sysdeps/powerpc/powerpc64/fpu/Versions
@@ -1,5 +1,5 @@
libmvec {
GLIBC_2.30 {
- _ZGVbN2v_cos; _ZGVbN4v_cosf; _ZGVbN2v_sin;
+ _ZGVbN2v_cos; _ZGVbN4v_cosf; _ZGVbN2v_sin; _ZGVbN4v_sinf;
}
}
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
index fe4cef9..b821641 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
@@ -1,9 +1,10 @@
ifeq ($(subdir),mathvec)
libmvec-sysdep_routines += vec_d_cos2_vsx vec_s_cosf4_vsx \
- vec_d_sin2_vsx
+ vec_d_sin2_vsx vec_s_sinf4_vsx
CFLAGS-vec_d_cos2_vsx.c += -mabi=altivec -maltivec -mvsx
CFLAGS-vec_s_cosf4_vsx.c += -mabi=altivec -maltivec -mvsx
CFLAGS-vec_d_sin2_vsx.c += -mabi=altivec -maltivec -mvsx
+CFLAGS-vec_s_sinf4_vsx.c += -mabi=altivec -maltivec -mvsx
endif
# Variables for libmvec tests.
@@ -12,7 +13,7 @@ ifeq ($(build-mathvec),yes)
libmvec-tests += double-vlen2 float-vlen4
double-vlen2-funcs = cos sin
-float-vlen4-funcs = cos
+float-vlen4-funcs = cos sin
double-vlen2-arch-ext-cflags = -mabi=altivec -maltivec -mvsx -DREQUIRE_VSX
float-vlen4-arch-ext-cflags = -mabi=altivec -maltivec -mvsx -DREQUIRE_VSX
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c
index f099990..44f94d1 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c
@@ -22,3 +22,4 @@
#define VEC_TYPE vector float
VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVbN4v_cosf)
+VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVbN4v_sinf)
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c
new file mode 100644
index 0000000..379fcca
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c
@@ -0,0 +1,113 @@
+/* Function sinf vectorized with VSX SIMD.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include "vec_s_trig_data.h"
+
+vector float
+_ZGVbN4v_sinf (vector float x)
+{
+
+ /*
+ ALGORITHM DESCRIPTION:
+
+ 1) Range reduction to [-Pi/2; +Pi/2] interval
+ a) Grab sign from source argument and save it.
+ b) Remove sign using AND operation
+ c) Getting octant Y by 1/Pi multiplication
+ d) Add "Right Shifter" value
+ e) Treat obtained value as integer for destination sign setting.
+ Shift first bit of this value to the last (sign) position
+ f) Change destination sign if source sign is negative
+ using XOR operation.
+ g) Subtract "Right Shifter" value
+ h) Subtract Y*PI from X argument, where PI divided to 4 parts:
+ X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
+ 2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
+ a) Calculate X^2 = X * X
+ b) Calculate polynomial:
+ R = X + X * X^2 * (A3 + x^2 * (A5 + ......
+ 3) Destination sign setting
+ a) Set shifted destination sign using XOR operation:
+ R = XOR( R, S ). */
+
+
+ /* Remove sign of input argument: X'=|X|. */
+ vector float abs_x = vec_abs (x);
+
+ /* Getting octant Y by 1/Pi multiplication. Add "Right Shifter" value.
+ Y = X'*InvPi + RS. */
+ vector float y = (abs_x * __s_inv_pi) + __s_rshifter;
+
+ /* N = Y - RS : right shifter sub. */
+ vector float n = y - __s_rshifter;
+
+ /* SignRes = Y<<31 : shift LSB to MSB place for result sign. */
+ vector float sign_res = (vector float)
+ vec_sl ((vector signed int) y, (vector unsigned int) vec_splats (31));
+
+ /* Subtract N*PI from X argument, where PI divided into 3 parts. */
+ /* R = X - N*PI1 - N*PI2 - N*PI3. */
+ vector float r = abs_x - (n * __s_pi1_fma);
+
+ /* R = R - N*Pi2. */
+ r = r - (n * __s_pi2_fma);
+
+ /* R = R - N*Pi3. */
+ r = r - (n * __s_pi3_fma);
+
+ /* Check for large arguments path. */
+ vector bool int large_in = vec_cmpgt (abs_x, __s_rangeval);
+
+ /* Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval). */
+
+ /* R2 = R*R. */
+ vector float r2 = r * r;
+
+ /* Change destination sign if source sign is -ve using XOR operation. */
+ vector float neg_sign = vec_andc (x, __s_abs_mask);
+
+ vector float res = (vector float)
+ ((vector signed int) r ^ (vector signed int) sign_res);
+
+ /* Poly = R + R * R2*(A3+R2*(A5+R2*(A7+R2*A9))). */
+ vector float poly = r2 * __s_a9_fma + __s_a7_fma;
+ poly = poly * r2 + __s_a5_fma;
+ poly = poly * r2 + __s_a3;
+ poly = poly * r2 * res + res;
+
+/* Destination sign setting.
+ Set shifted destination sign using XOR operation: R = XOR( R, S ). */
+ vector float out
+ = (vector float) ((vector int) poly ^ (vector int) neg_sign);
+
+ if (large_in[0])
+ out[0] = sinf (x[0]);
+
+ if (large_in[1])
+ out[1] = sinf (x[1]);
+
+ if (large_in[2])
+ out[2] = sinf (x[2]);
+
+ if (large_in[3])
+ out[3] = sinf (x[3]);
+
+ return out;
+
+} /* Function _ZGVbN4v_sinf. */
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h
index 55c2856..5e1667a 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h
@@ -23,50 +23,54 @@
#include <altivec.h>
/* PI/2. */
-const vector float __s_half_pi =
+static const vector float __s_half_pi =
{ 0x1.921fb6p+0, 0x1.921fb6p+0, 0x1.921fb6p+0, 0x1.921fb6p+0 };
/* Inverse PI. */
-const vector float __s_inv_pi =
+static const vector float __s_inv_pi =
{ 0x1.45f306p-2, 0x1.45f306p-2, 0x1.45f306p-2, 0x1.45f306p-2 };
/* Right-shifter constant. */
-const vector float __s_rshifter =
+static const vector float __s_rshifter =
{ 0x1.8p+23, 0x1.8p+23, 0x1.8p+23, 0x1.8p+23 };
/* One-half. */
-const vector float __s_one_half =
+static const vector float __s_one_half =
{ 0x1p-1, 0x1p-1, 0x1p-1, 0x1p-1 };
/* Threshold for out-of-range values. */
-const vector float __s_rangeval =
+static const vector float __s_rangeval =
{ 0x1.388p+13, 0x1.388p+13, 0x1.388p+13, 0x1.388p+13 };
/* PI1, PI2, and PI3 when FMA is available
PI high part (when FMA available). */
-const vector float __s_pi1_fma =
+static const vector float __s_pi1_fma =
{ 0x1.921fb6p+1, 0x1.921fb6p+1, 0x1.921fb6p+1, 0x1.921fb6p+1 };
/* PI mid part (when FMA available). */
-const vector float __s_pi2_fma =
+static const vector float __s_pi2_fma =
{ -0x1.777a5cp-24, -0x1.777a5cp-24, -0x1.777a5cp-24, -0x1.777a5cp-24 };
/* PI low part (when FMA available). */
-const vector float __s_pi3_fma =
+static const vector float __s_pi3_fma =
{ -0x1.ee59dap-49, -0x1.ee59dap-49, -0x1.ee59dap-49, -0x1.ee59dap-49 };
/* Polynomial constants for work w/o FMA, relative error ~ 2^(-26.625). */
-const vector float __s_a3 =
+static const vector float __s_a3 =
{ -0x1.55554cp-3, -0x1.55554cp-3, -0x1.55554cp-3, -0x1.55554cp-3 };
/* Polynomial constants, work with FMA, relative error ~ 2^(-26.417). */
-const vector float __s_a5_fma =
+static const vector float __s_a5_fma =
{ 0x1.110edp-7, 0x1.110edp-7, 0x1.110edp-7, 0x1.110edp-7 };
-const vector float __s_a7_fma =
+static const vector float __s_a7_fma =
{ -0x1.9f6d9ep-13, -0x1.9f6d9ep-13, -0x1.9f6d9ep-13, -0x1.9f6d9ep-13 };
-const vector float __s_a9_fma =
+static const vector float __s_a9_fma =
{ 0x1.5d866ap-19, 0x1.5d866ap-19, 0x1.5d866ap-19, 0x1.5d866ap-19 };
+/* Absolute value mask. */
+static const vector bool int __s_abs_mask =
+{ 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff };
+
#endif /* S_TRIG_DATA_H. */
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
index a1a7f69..48a742c 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
@@ -1,3 +1,4 @@
GLIBC_2.30 _ZGVbN2v_cos F
GLIBC_2.30 _ZGVbN2v_sin F
GLIBC_2.30 _ZGVbN4v_cosf F
+GLIBC_2.30 _ZGVbN4v_sinf F