diff options
author | Andrew Senkevich <andrew.senkevich@intel.com> | 2015-06-23 19:21:50 +0300 |
---|---|---|
committer | Andrew Senkevich <andrew.senkevich@intel.com> | 2015-06-23 19:21:50 +0300 |
commit | 5872b8352a8b6c0aa49c4e9f82bbda32becc5f02 (patch) | |
tree | 87f9545391602848e21061e6d3d7a808672beef7 | |
parent | 718d34a309493f8697ff9a8fefcbacbba12a2ccd (diff) | |
download | glibc-5872b8352a8b6c0aa49c4e9f82bbda32becc5f02.zip glibc-5872b8352a8b6c0aa49c4e9f82bbda32becc5f02.tar.gz glibc-5872b8352a8b6c0aa49c4e9f82bbda32becc5f02.tar.bz2 |
Combination of data tables for x86_64 vector functions sin, cos and sincos.
* sysdeps/x86_64/fpu/Makefile (libmvec-support): Fixed files list.
* sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S: Renamed variable
and included header.
* sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: Likewise.
* sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_trig_data.S: New file.
* sysdeps/x86_64/fpu/svml_d_trig_data.h: Likewise.
* sysdeps/x86_64/fpu/svml_d_cos2_core.S: Removed unneeded include.
* sysdeps/x86_64/fpu/svml_d_cos4_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_cos8_core.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_cos_data.S: Removed file.
* sysdeps/x86_64/fpu/svml_d_cos_data.h: Likewise.
* sysdeps/x86_64/fpu/svml_d_sin_data.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_sin_data.h: Likewise.
* sysdeps/x86_64/fpu/svml_d_sincos_data.S: Likewise.
* sysdeps/x86_64/fpu/svml_d_sincos_data.h: Likewise.
21 files changed, 198 insertions, 439 deletions
@@ -1,3 +1,28 @@ +2015-06-23 Andrew Senkevich <andrew.senkevich@intel.com> + + * sysdeps/x86_64/fpu/Makefile (libmvec-support): Fixed files list. + * sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S: Renamed variable + and included header. + * sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S: Likewise. + * sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S: Likewise. + * sysdeps/x86_64/fpu/svml_d_trig_data.S: New file. + * sysdeps/x86_64/fpu/svml_d_trig_data.h: Likewise. + * sysdeps/x86_64/fpu/svml_d_cos2_core.S: Removed unneeded include. + * sysdeps/x86_64/fpu/svml_d_cos4_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_d_cos8_core.S: Likewise. + * sysdeps/x86_64/fpu/svml_d_cos_data.S: Removed file. + * sysdeps/x86_64/fpu/svml_d_cos_data.h: Likewise. + * sysdeps/x86_64/fpu/svml_d_sin_data.S: Likewise. + * sysdeps/x86_64/fpu/svml_d_sin_data.h: Likewise. + * sysdeps/x86_64/fpu/svml_d_sincos_data.S: Likewise. + * sysdeps/x86_64/fpu/svml_d_sincos_data.h: Likewise. + 2015-06-23 Joseph Myers <joseph@codesourcery.com> [BZ #18371] diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile index c6912cb..c377100 100644 --- a/sysdeps/x86_64/fpu/Makefile +++ b/sysdeps/x86_64/fpu/Makefile @@ -1,14 +1,14 @@ ifeq ($(subdir),mathvec) libmvec-support += svml_d_cos2_core svml_d_cos4_core_avx \ svml_d_cos4_core svml_d_cos8_core \ - svml_d_cos_data svml_d_sin2_core svml_d_sin4_core_avx \ - svml_d_sin4_core svml_d_sin8_core svml_d_sin_data \ + svml_d_sin2_core svml_d_sin4_core_avx \ + svml_d_sin4_core svml_d_sin8_core svml_d_trig_data \ svml_s_cosf4_core svml_s_cosf8_core_avx \ svml_s_cosf8_core svml_s_cosf16_core svml_s_cosf_data \ svml_s_sinf4_core svml_s_sinf8_core_avx \ svml_s_sinf8_core svml_s_sinf16_core svml_s_sinf_data \ svml_d_sincos2_core svml_d_sincos4_core_avx \ - svml_d_sincos4_core svml_d_sincos8_core svml_d_sincos_data \ + svml_d_sincos4_core svml_d_sincos8_core \ svml_d_log2_core svml_d_log4_core_avx svml_d_log4_core \ svml_d_log8_core svml_d_log_data svml_s_logf4_core \ svml_s_logf8_core_avx svml_s_logf8_core svml_s_logf16_core \ diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S index 11348a3..4420edc 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S @@ -17,7 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_cos_data.h" +#include "svml_d_trig_data.h" .text ENTRY (_ZGVbN2v_cos_sse4) @@ -41,7 +41,7 @@ ENTRY (_ZGVbN2v_cos_sse4) andq $-64, %rsp subq $320, %rsp movaps %xmm0, %xmm3 - movq __svml_dcos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax movups __dHalfPI(%rax), %xmm2 /* ARGUMENT RANGE REDUCTION: diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S index f192ba0..9a776e7 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S @@ -17,7 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_cos_data.h" +#include "svml_d_trig_data.h" .text ENTRY (_ZGVdN4v_cos_avx2) @@ -41,7 +41,7 @@ ENTRY (_ZGVdN4v_cos_avx2) cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $448, %rsp - movq __svml_dcos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vmovapd %ymm0, %ymm1 vmovupd __dInvPI(%rax), %ymm4 vmovupd __dRShifter(%rax), %ymm5 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S index 14695ec..b376155 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S @@ -17,7 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_cos_data.h" +#include "svml_d_trig_data.h" #include "svml_d_wrapper_impl.h" .text @@ -45,7 +45,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1280, %rsp - movq __svml_dcos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax /* R = X - N*Pi1 */ vmovaps %zmm0, %zmm7 @@ -259,7 +259,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1280, %rsp - movq __svml_dcos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax /* R = X - N*Pi1 */ vmovaps %zmm0, %zmm8 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S index 4b4d8be..3a1ccbf 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S @@ -17,7 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_sin_data.h" +#include "svml_d_trig_data.h" .text ENTRY (_ZGVbN2v_sin_sse4) @@ -41,11 +41,11 @@ ENTRY (_ZGVbN2v_sin_sse4) andq $-64, %rsp subq $320, %rsp movaps %xmm0, %xmm5 - movq __svml_dsin_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax movups __dAbsMask(%rax), %xmm3 /* - * ARGUMENT RANGE REDUCTION: - * X' = |X| + ARGUMENT RANGE REDUCTION: + X' = |X| */ movaps %xmm3, %xmm4 @@ -90,31 +90,31 @@ ENTRY (_ZGVbN2v_sin_sse4) subpd %xmm1, %xmm0 /* - * POLYNOMIAL APPROXIMATION: - * R2 = R*R + POLYNOMIAL APPROXIMATION: + R2 = R*R */ movaps %xmm0, %xmm1 mulpd %xmm0, %xmm1 /* R = R^SignRes : update sign of reduced argument */ xorps %xmm2, %xmm0 - movups __dC7(%rax), %xmm2 + movups __dC7_sin(%rax), %xmm2 mulpd %xmm1, %xmm2 - addpd __dC6(%rax), %xmm2 + addpd __dC6_sin(%rax), %xmm2 mulpd %xmm1, %xmm2 - addpd __dC5(%rax), %xmm2 + addpd __dC5_sin(%rax), %xmm2 mulpd %xmm1, %xmm2 - addpd __dC4(%rax), %xmm2 + addpd __dC4_sin(%rax), %xmm2 /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ mulpd %xmm1, %xmm2 - addpd __dC3(%rax), %xmm2 + addpd __dC3_sin(%rax), %xmm2 /* Poly = R2*(C1+R2*(C2+R2*Poly)) */ mulpd %xmm1, %xmm2 - addpd __dC2(%rax), %xmm2 + addpd __dC2_sin(%rax), %xmm2 mulpd %xmm1, %xmm2 - addpd __dC1(%rax), %xmm2 + addpd __dC1_sin(%rax), %xmm2 mulpd %xmm2, %xmm1 /* Poly = Poly*R + R */ @@ -122,8 +122,8 @@ ENTRY (_ZGVbN2v_sin_sse4) addpd %xmm1, %xmm0 /* - * RECONSTRUCTION: - * Final sign setting: Res = Poly^SignX + RECONSTRUCTION: + Final sign setting: Res = Poly^SignX */ xorps %xmm3, %xmm0 testl %ecx, %ecx diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S index e7e60d4..6bf8b32 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S @@ -17,7 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_sin_data.h" +#include "svml_d_trig_data.h" .text ENTRY (_ZGVdN4v_sin_avx2) @@ -40,7 +40,7 @@ ENTRY (_ZGVdN4v_sin_avx2) cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $448, %rsp - movq __svml_dsin_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vmovdqa %ymm0, %ymm4 vmovupd __dAbsMask(%rax), %ymm2 vmovupd __dInvPI(%rax), %ymm6 @@ -80,17 +80,17 @@ ENTRY (_ZGVdN4v_sin_avx2) /* R = R^SignRes : update sign of reduced argument */ vxorpd %ymm5, %ymm1, %ymm6 - vmovupd __dC7(%rax), %ymm1 - vfmadd213pd __dC6(%rax), %ymm0, %ymm1 - vfmadd213pd __dC5(%rax), %ymm0, %ymm1 - vfmadd213pd __dC4(%rax), %ymm0, %ymm1 + vmovupd __dC7_sin(%rax), %ymm1 + vfmadd213pd __dC6_sin(%rax), %ymm0, %ymm1 + vfmadd213pd __dC5_sin(%rax), %ymm0, %ymm1 + vfmadd213pd __dC4_sin(%rax), %ymm0, %ymm1 /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ - vfmadd213pd __dC3(%rax), %ymm0, %ymm1 + vfmadd213pd __dC3_sin(%rax), %ymm0, %ymm1 /* Poly = R2*(C1+R2*(C2+R2*Poly)) */ - vfmadd213pd __dC2(%rax), %ymm0, %ymm1 - vfmadd213pd __dC1(%rax), %ymm0, %ymm1 + vfmadd213pd __dC2_sin(%rax), %ymm0, %ymm1 + vfmadd213pd __dC1_sin(%rax), %ymm0, %ymm1 /* SignX - sign bit of X */ vandnpd %ymm4, %ymm2, %ymm7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S index c01ad1f..422f6e8 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S @@ -17,7 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_sin_data.h" +#include "svml_d_trig_data.h" #include "svml_d_wrapper_impl.h" .text @@ -45,18 +45,18 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1280, %rsp - movq __svml_dsin_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax movq $-1, %rdx vmovups __dAbsMask(%rax), %zmm6 vmovups __dInvPI(%rax), %zmm1 /* - * ARGUMENT RANGE REDUCTION: - * X' = |X| + ARGUMENT RANGE REDUCTION: + X' = |X| */ vpandq %zmm6, %zmm0, %zmm12 vmovups __dPI1_FMA(%rax), %zmm2 - vmovups __dC7(%rax), %zmm7 + vmovups __dC7_sin(%rax), %zmm7 /* SignX - sign bit of X */ vpandnq %zmm0, %zmm6, %zmm11 @@ -86,31 +86,31 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin vfnmadd132pd __dPI3_FMA(%rax), %zmm3, %zmm4 /* - * POLYNOMIAL APPROXIMATION: - * R2 = R*R + POLYNOMIAL APPROXIMATION: + R2 = R*R */ vmulpd %zmm4, %zmm4, %zmm8 /* R = R^SignRes : update sign of reduced argument */ vpxorq %zmm5, %zmm4, %zmm9 - vfmadd213pd __dC6(%rax), %zmm8, %zmm7 - vfmadd213pd __dC5(%rax), %zmm8, %zmm7 - vfmadd213pd __dC4(%rax), %zmm8, %zmm7 + vfmadd213pd __dC6_sin(%rax), %zmm8, %zmm7 + vfmadd213pd __dC5_sin(%rax), %zmm8, %zmm7 + vfmadd213pd __dC4_sin(%rax), %zmm8, %zmm7 /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ - vfmadd213pd __dC3(%rax), %zmm8, %zmm7 + vfmadd213pd __dC3_sin(%rax), %zmm8, %zmm7 /* Poly = R2*(C1+R2*(C2+R2*Poly)) */ - vfmadd213pd __dC2(%rax), %zmm8, %zmm7 - vfmadd213pd __dC1(%rax), %zmm8, %zmm7 + vfmadd213pd __dC2_sin(%rax), %zmm8, %zmm7 + vfmadd213pd __dC1_sin(%rax), %zmm8, %zmm7 vmulpd %zmm8, %zmm7, %zmm10 /* Poly = Poly*R + R */ vfmadd213pd %zmm9, %zmm9, %zmm10 /* - * RECONSTRUCTION: - * Final sign setting: Res = Poly^SignX + RECONSTRUCTION: + Final sign setting: Res = Poly^SignX */ vpxorq %zmm11, %zmm10, %zmm1 testl %ecx, %ecx @@ -260,13 +260,13 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1280, %rsp - movq __svml_dsin_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vpbroadcastq .L_2il0floatpacket.14(%rip), %zmm14 vmovups __dAbsMask(%rax), %zmm7 vmovups __dInvPI(%rax), %zmm2 vmovups __dRShifter(%rax), %zmm1 vmovups __dPI1_FMA(%rax), %zmm3 - vmovups __dC7(%rax), %zmm8 + vmovups __dC7_sin(%rax), %zmm8 /* ARGUMENT RANGE REDUCTION: @@ -305,16 +305,16 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin /* R = R^SignRes : update sign of reduced argument */ vxorpd %zmm6, %zmm5, %zmm10 - vfmadd213pd __dC6(%rax), %zmm9, %zmm8 - vfmadd213pd __dC5(%rax), %zmm9, %zmm8 - vfmadd213pd __dC4(%rax), %zmm9, %zmm8 + vfmadd213pd __dC6_sin(%rax), %zmm9, %zmm8 + vfmadd213pd __dC5_sin(%rax), %zmm9, %zmm8 + vfmadd213pd __dC4_sin(%rax), %zmm9, %zmm8 /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))) */ - vfmadd213pd __dC3(%rax), %zmm9, %zmm8 + vfmadd213pd __dC3_sin(%rax), %zmm9, %zmm8 /* Poly = R2*(C1+R2*(C2+R2*Poly)) */ - vfmadd213pd __dC2(%rax), %zmm9, %zmm8 - vfmadd213pd __dC1(%rax), %zmm9, %zmm8 + vfmadd213pd __dC2_sin(%rax), %zmm9, %zmm8 + vfmadd213pd __dC1_sin(%rax), %zmm9, %zmm8 vmulpd %zmm9, %zmm8, %zmm11 /* Poly = Poly*R + R */ diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S index 0b37c7c..b504d1d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S @@ -17,7 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_sincos_data.h" +#include "svml_d_trig_data.h" .text ENTRY (_ZGVbN2vvv_sincos_sse4) @@ -43,7 +43,7 @@ ENTRY (_ZGVbN2vvv_sincos_sse4) cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $320, %rsp - movq __svml_dsincos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax movups %xmm11, 160(%rsp) movups %xmm12, 144(%rsp) movups __dSignMask(%rax), %xmm11 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S index ec1ccc6..dca5604 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S @@ -17,7 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_sincos_data.h" +#include "svml_d_trig_data.h" .text ENTRY (_ZGVdN4vvv_sincos_avx2) @@ -43,7 +43,7 @@ ENTRY (_ZGVdN4vvv_sincos_avx2) cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $448, %rsp - movq __svml_dsincos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vmovups %ymm14, 288(%rsp) vmovups %ymm8, 352(%rsp) vmovupd __dSignMask(%rax), %ymm6 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S index fcbf393..e838832 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S @@ -17,7 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_sincos_data.h" +#include "svml_d_trig_data.h" #include "svml_d_wrapper_impl.h" /* @@ -47,7 +47,7 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1344, %rsp - movq __svml_dsincos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vmovaps %zmm0, %zmm4 movq $-1, %rdx vmovups __dSignMask(%rax), %zmm12 @@ -317,7 +317,7 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $1344, %rsp - movq __svml_dsincos_data@GOTPCREL(%rip), %rax + movq __svml_d_trig_data@GOTPCREL(%rip), %rax vmovaps %zmm0, %zmm8 vmovups __dSignMask(%rax), %zmm4 vmovups __dInvPI(%rax), %zmm9 diff --git a/sysdeps/x86_64/fpu/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/svml_d_cos2_core.S index a1c5bee..a26beca 100644 --- a/sysdeps/x86_64/fpu/svml_d_cos2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_cos2_core.S @@ -17,7 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_cos_data.h" #include "svml_d_wrapper_impl.h" .text diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/svml_d_cos4_core.S index a505b44..35996b7 100644 --- a/sysdeps/x86_64/fpu/svml_d_cos4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_cos4_core.S @@ -17,7 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_cos_data.h" #include "svml_d_wrapper_impl.h" .text diff --git a/sysdeps/x86_64/fpu/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/svml_d_cos8_core.S index c7507db..1ba10e8 100644 --- a/sysdeps/x86_64/fpu/svml_d_cos8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_cos8_core.S @@ -17,7 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "svml_d_cos_data.h" #include "svml_d_wrapper_impl.h" .text diff --git a/sysdeps/x86_64/fpu/svml_d_cos_data.h b/sysdeps/x86_64/fpu/svml_d_cos_data.h deleted file mode 100644 index 4d28e6e..0000000 --- a/sysdeps/x86_64/fpu/svml_d_cos_data.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Offsets for data table for vectorized cos. - Copyright (C) 2014-2015 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef D_COS_DATA_H -#define D_COS_DATA_H - -#define __dAbsMask 0 -#define __dRangeVal 64 -#define __dHalfPI 128 -#define __dInvPI 192 -#define __dRShifter 256 -#define __dOneHalf 320 -#define __dPI1 384 -#define __dPI2 448 -#define __dPI3 512 -#define __dPI4 576 -#define __dPI1_FMA 640 -#define __dPI2_FMA 704 -#define __dPI3_FMA 768 -#define __dC1 832 -#define __dC2 896 -#define __dC3 960 -#define __dC4 1024 -#define __dC5 1088 -#define __dC6 1152 -#define __dC7 1216 -#define __dAbsMask_la 1280 -#define __dInvPI_la 1344 -#define __dRShifter_la 1408 -#define __dRShifterm5_la 1472 -#define __dRXmax_la 1536 - -#endif diff --git a/sysdeps/x86_64/fpu/svml_d_sin_data.S b/sysdeps/x86_64/fpu/svml_d_sin_data.S deleted file mode 100644 index e5e1ff7..0000000 --- a/sysdeps/x86_64/fpu/svml_d_sin_data.S +++ /dev/null @@ -1,82 +0,0 @@ -/* Data for vectorized sin. - Copyright (C) 2014-2015 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include "svml_d_sin_data.h" - - .section .rodata, "a" - .align 64 - -/* Data table for vector implementations of function sin. - The table may contain polynomial, reduction, lookup coefficients and other macro_names - obtained through different methods of research and experimental work. */ - - .globl __svml_dsin_data -__svml_dsin_data: - -/* General purpose constants: - absolute value mask */ -double_vector __dAbsMask 0x7fffffffffffffff - -/* working range threshold */ -double_vector __dRangeVal 0x4170000000000000 - -/* 1/PI */ -double_vector __dInvPI 0x3fd45f306dc9c883 - -/* right-shifter constant */ -double_vector __dRShifter 0x4338000000000000 - -/* 0.0 */ -double_vector __dZero 0x0000000000000000 - -/* -0.0 */ -double_vector __lNZero 0x8000000000000000 - -/* Range reduction PI-based constants: - PI high part */ -double_vector __dPI1 0x400921fb40000000 - -/* PI mid part 1 */ -double_vector __dPI2 0x3e84442d00000000 - -/* PI mid part 2 */ -double_vector __dPI3 0x3d08469880000000 - -/* PI low part */ -double_vector __dPI4 0x3b88cc51701b839a - -/* Range reduction PI-based constants if FMA available: - PI high part (FMA available) */ -double_vector __dPI1_FMA 0x400921fb54442d18 - -/* PI mid part (FMA available) */ -double_vector __dPI2_FMA 0x3ca1a62633145c06 - -/* PI low part (FMA available) */ -double_vector __dPI3_FMA 0x395c1cd129024e09 - -/* Polynomial coefficients (relative error 2^(-52.115)): */ -double_vector __dC1 0xbfc55555555554a8 -double_vector __dC2 0x3f8111111110a573 -double_vector __dC3 0xbf2a01a019a659dd -double_vector __dC4 0x3ec71de3806add1a -double_vector __dC5 0xbe5ae6355aaa4a53 -double_vector __dC6 0x3de60e6bee01d83e -double_vector __dC7 0xbd69f1517e9f65f0 - .type __svml_dsin_data,@object - .size __svml_dsin_data,.-__svml_dsin_data diff --git a/sysdeps/x86_64/fpu/svml_d_sin_data.h b/sysdeps/x86_64/fpu/svml_d_sin_data.h deleted file mode 100644 index 76ab508..0000000 --- a/sysdeps/x86_64/fpu/svml_d_sin_data.h +++ /dev/null @@ -1,53 +0,0 @@ -/* Offsets for data table for vectorized sin. - Copyright (C) 2014-2015 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef D_SIN_DATA_H -#define D_SIN_DATA_H - -/* Offsets for data table */ -#define __dAbsMask 0 -#define __dRangeVal 64 -#define __dInvPI 128 -#define __dRShifter 192 -#define __dZero 256 -#define __lNZero 320 -#define __dPI1 384 -#define __dPI2 448 -#define __dPI3 512 -#define __dPI4 576 -#define __dPI1_FMA 640 -#define __dPI2_FMA 704 -#define __dPI3_FMA 768 -#define __dC1 832 -#define __dC2 896 -#define __dC3 960 -#define __dC4 1024 -#define __dC5 1088 -#define __dC6 1152 -#define __dC7 1216 - -.macro double_vector offset value -.if .-__svml_dsin_data != \offset -.err -.endif -.rept 8 -.quad \value -.endr -.endm - -#endif diff --git a/sysdeps/x86_64/fpu/svml_d_sincos_data.S b/sysdeps/x86_64/fpu/svml_d_sincos_data.S deleted file mode 100644 index 6749ba6..0000000 --- a/sysdeps/x86_64/fpu/svml_d_sincos_data.S +++ /dev/null @@ -1,111 +0,0 @@ -/* Data for function sincos. - Copyright (C) 2014-2015 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include "svml_d_sincos_data.h" - - .section .rodata, "a" - .align 64 - -/* Data table for vector implementations of function sincos. - The table may contain polynomial, reduction, lookup coefficients and - other coefficients obtained through different methods of research and - experimental work. */ - - .globl __svml_dsincos_data -__svml_dsincos_data: - -/* General purpose constants: - dSignMask */ -double_vector __dSignMask 0x8000000000000000 - -/* dAbsMask */ -double_vector __dAbsMask 0x7fffffffffffffff - -/* lRangeVal */ -double_vector __dRangeVal 0x4160000000000000 - -/* HalfPI */ -double_vector __dHalfPI 0x3ff921fb54442d18 - -/* InvPI */ -double_vector __dInvPI 0x3fd45f306dc9c883 - -/* dRShifter */ -double_vector __dRShifter 0x4338000000000000 - -/* dOneHalf */ -double_vector __dOneHalf 0x3fe0000000000000 - -/* Range reduction PI-based constants: - PI1 */ -double_vector __dPI1 0x400921fb40000000 - -/* PI2 */ -double_vector __dPI2 0x3e84442d00000000 - -/* PI3 */ -double_vector __dPI3 0x3d08469880000000 - -/* PI4 */ -double_vector __dPI4 0x3b88cc51701b839a - -/* Range reduction PI-based constants if FMA available: - PI1_FMA */ -double_vector __dPI1_FMA 0x400921fb54442d18 - -/* PI2_FMA */ -double_vector __dPI2_FMA 0x3ca1a62633145c06 - -/* PI3_FMA */ -double_vector __dPI3_FMA 0x395c1cd129024e09 - -/* HalfPI1 */ -double_vector __dHalfPI1 0x3ff921fc00000000 - -/* HalfPI2 */ -double_vector __dHalfPI2 0xbea5777a00000000 - -/* HalfPI3 */ -double_vector __dHalfPI3 0xbd473dcc00000000 - -/* HalfPI4 */ -double_vector __dHalfPI4 0x3bf898cc51701b84 - -/* Polynomial coefficients (relative error 2^(-52.115)): - C1 */ -double_vector __dC1 0xbfc55555555554a7 - -/* C2 */ -double_vector __dC2 0x3f8111111110a4a8 - -/* C3 */ -double_vector __dC3 0xbf2a01a019a5b86d - -/* C4 */ -double_vector __dC4 0x3ec71de38030fea0 - -/* C5 */ -double_vector __dC5 0xbe5ae63546002231 - -/* C6 */ -double_vector __dC6 0x3de60e6857a2f220 - -/* C7 */ -double_vector __dC7 0xbd69f0d60811aac8 - .type __svml_dsincos_data,@object - .size __svml_dsincos_data,.-__svml_dsincos_data diff --git a/sysdeps/x86_64/fpu/svml_d_sincos_data.h b/sysdeps/x86_64/fpu/svml_d_sincos_data.h deleted file mode 100644 index cc316dc..0000000 --- a/sysdeps/x86_64/fpu/svml_d_sincos_data.h +++ /dev/null @@ -1,57 +0,0 @@ -/* Offsets for data table for function sincos. - Copyright (C) 2014-2015 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef D_SINCOS_DATA_H -#define D_SINCOS_DATA_H - -#define __dSignMask 0 -#define __dAbsMask 64 -#define __dRangeVal 128 -#define __dHalfPI 192 -#define __dInvPI 256 -#define __dRShifter 320 -#define __dOneHalf 384 -#define __dPI1 448 -#define __dPI2 512 -#define __dPI3 576 -#define __dPI4 640 -#define __dPI1_FMA 704 -#define __dPI2_FMA 768 -#define __dPI3_FMA 832 -#define __dHalfPI1 896 -#define __dHalfPI2 960 -#define __dHalfPI3 1024 -#define __dHalfPI4 1088 -#define __dC1 1152 -#define __dC2 1216 -#define __dC3 1280 -#define __dC4 1344 -#define __dC5 1408 -#define __dC6 1472 -#define __dC7 1536 - -.macro double_vector offset value -.if .-__svml_dsincos_data != \offset -.err -.endif -.rept 8 -.quad \value -.endr -.endm - -#endif diff --git a/sysdeps/x86_64/fpu/svml_d_cos_data.S b/sysdeps/x86_64/fpu/svml_d_trig_data.S index c9bfd63..d3b3059 100644 --- a/sysdeps/x86_64/fpu/svml_d_cos_data.S +++ b/sysdeps/x86_64/fpu/svml_d_trig_data.S @@ -1,4 +1,4 @@ -/* Data for vectorized cos. +/* Data for vectorized sin, cos, sincos. Copyright (C) 2014-2015 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -16,27 +16,18 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include "svml_d_cos_data.h" - -.macro double_vector offset value -.if .-__svml_dcos_data != \offset -.err -.endif -.rept 8 -.quad \value -.endr -.endm +#include "svml_d_trig_data.h" .section .rodata, "a" .align 64 -/* Data table for vector implementations of function cos. +/* Data table for vector implementations. The table may contain polynomial, reduction, lookup coefficients and other constants obtained through different methods of research and experimental work. */ - .globl __svml_dcos_data -__svml_dcos_data: + .globl __svml_d_trig_data +__svml_d_trig_data: /* General purpose constants: absolute value mask @@ -46,6 +37,9 @@ double_vector __dAbsMask 0x7fffffffffffffff /* working range threshold */ double_vector __dRangeVal 0x4160000000000000 +/* working range threshold */ +double_vector __dRangeVal_sin 0x4170000000000000 + /* PI/2 */ double_vector __dHalfPI 0x3ff921fb54442d18 @@ -55,6 +49,12 @@ double_vector __dInvPI 0x3fd45f306dc9c883 /* right-shifter constant */ double_vector __dRShifter 0x4338000000000000 +/* 0.0 */ +double_vector __dZero 0x0000000000000000 + +/* -0.0 */ +double_vector __lNZero 0x8000000000000000 + /* 0.5 */ double_vector __dOneHalf 0x3fe0000000000000 @@ -83,6 +83,18 @@ double_vector __dPI2_FMA 0x3ca1a62633145c06 /* PI low part (FMA available) */ double_vector __dPI3_FMA 0x395c1cd129024e09 +/* HalfPI1 */ +double_vector __dHalfPI1 0x3ff921fc00000000 + +/* HalfPI2 */ +double_vector __dHalfPI2 0xbea5777a00000000 + +/* HalfPI3 */ +double_vector __dHalfPI3 0xbd473dcc00000000 + +/* HalfPI4 */ +double_vector __dHalfPI4 0x3bf898cc51701b84 + /* Polynomial coefficients (relative error 2^(-52.115)): */ double_vector __dC1 0xbfc55555555554a7 double_vector __dC2 0x3f8111111110a4a8 @@ -92,15 +104,19 @@ double_vector __dC5 0xbe5ae63546002231 double_vector __dC6 0x3de60e6857a2f220 double_vector __dC7 0xbd69f0d60811aac8 +/* Polynomial coefficients (relative error 2^(-52.115)): */ +double_vector __dC1_sin 0xbfc55555555554a8 +double_vector __dC2_sin 0x3f8111111110a573 +double_vector __dC3_sin 0xbf2a01a019a659dd +double_vector __dC4_sin 0x3ec71de3806add1a +double_vector __dC5_sin 0xbe5ae6355aaa4a53 +double_vector __dC6_sin 0x3de60e6bee01d83e +double_vector __dC7_sin 0xbd69f1517e9f65f0 + /* Additional constants: absolute value mask */ -double_vector __dAbsMask_la 0x7fffffffffffffff - -/* 1/PI */ -double_vector __dInvPI_la 0x3fd45f306dc9c883 - /* right-shifer for low accuracy version */ double_vector __dRShifter_la 0x4330000000000000 @@ -110,5 +126,5 @@ double_vector __dRShifterm5_la 0x432fffffffffffff /* right-shifer with low mask for low accuracy version */ double_vector __dRXmax_la 0x43300000007ffffe - .type __svml_dcos_data,@object - .size __svml_dcos_data,.-__svml_dcos_data + .type __svml_d_trig_data,@object + .size __svml_d_trig_data,.-__svml_d_trig_data diff --git a/sysdeps/x86_64/fpu/svml_d_trig_data.h b/sysdeps/x86_64/fpu/svml_d_trig_data.h new file mode 100644 index 0000000..1395337 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_trig_data.h @@ -0,0 +1,72 @@ +/* Offsets for data table for vectorized sin, cos, sincos. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef D_TRIG_DATA_H +#define D_TRIG_DATA_H + +#define __dAbsMask 0 +#define __dRangeVal 64 +#define __dRangeVal_sin 64*2 +#define __dHalfPI 64*3 +#define __dInvPI 64*4 +#define __dRShifter 64*5 +#define __dZero 64*6 +#define __lNZero 64*7 +#define __dOneHalf 64*8 +#define __dPI1 64*9 +#define __dPI2 64*10 +#define __dPI3 64*11 +#define __dPI4 64*12 +#define __dPI1_FMA 64*13 +#define __dPI2_FMA 64*14 +#define __dPI3_FMA 64*15 +#define __dHalfPI1 64*16 +#define __dHalfPI2 64*17 +#define __dHalfPI3 64*18 +#define __dHalfPI4 64*19 +#define __dC1 64*20 +#define __dC2 64*21 +#define __dC3 64*22 +#define __dC4 64*23 +#define __dC5 64*24 +#define __dC6 64*25 +#define __dC7 64*26 +#define __dC1_sin 64*27 +#define __dC2_sin 64*28 +#define __dC3_sin 64*29 +#define __dC4_sin 64*30 +#define __dC5_sin 64*31 +#define __dC6_sin 64*32 +#define __dC7_sin 64*33 +#define __dRShifter_la 64*34 +#define __dRShifterm5_la 64*35 +#define __dRXmax_la 64*36 +#define __dAbsMask_la __dAbsMask +#define __dInvPI_la __dInvPI +#define __dSignMask __lNZero + +.macro double_vector offset value +.if .-__svml_d_trig_data != \offset +.err +.endif +.rept 8 +.quad \value +.endr +.endm + +#endif |