aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
authorAndrew Senkevich <andrew.senkevich@intel.com>2015-06-17 15:58:05 +0300
committerAndrew Senkevich <andrew.senkevich@intel.com>2015-06-17 15:58:05 +0300
commit9c02f663f6b387b3905b629ffe584c9abf2030dc (patch)
tree587a88eca7b4c3abd7c5482c07c7a35778025785 /sysdeps
parent774488f88aeed6b838fe29c3c7561433c242a3c9 (diff)
downloadglibc-9c02f663f6b387b3905b629ffe584c9abf2030dc.zip
glibc-9c02f663f6b387b3905b629ffe584c9abf2030dc.tar.gz
glibc-9c02f663f6b387b3905b629ffe584c9abf2030dc.tar.bz2
Vector exp for x86_64 and tests.
Here is implementation of vectorized exp containing SSE, AVX, AVX2 and AVX512 versions according to Vector ABI <https://groups.google.com/forum/#!topic/x86-64-abi/LmppCfN1rZ4>. * bits/libm-simd-decl-stubs.h: Added stubs for exp. * math/bits/mathcalls.h: Added exp declaration with __MATHCALL_VEC. * sysdeps/unix/sysv/linux/x86_64/libmvec.abilist: New versions added. * sysdeps/x86/fpu/bits/math-vector.h: Added SIMD declaration and asm redirections for exp. * sysdeps/x86_64/fpu/Makefile (libmvec-support): Added new files. * sysdeps/x86_64/fpu/Versions: New versions added. * sysdeps/x86_64/fpu/libm-test-ulps: Regenerated. * sysdeps/x86_64/fpu/multiarch/Makefile (libmvec-sysdep_routines): Added build of SSE, AVX2 and AVX512 IFUNC versions. * sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S: New file. * sysdeps/x86_64/fpu/svml_d_exp2_core.S: New file. * sysdeps/x86_64/fpu/svml_d_exp4_core.S: New file. * sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S: New file. * sysdeps/x86_64/fpu/svml_d_exp8_core.S: New file. * sysdeps/x86_64/fpu/svml_d_exp_data.S: New file. * sysdeps/x86_64/fpu/svml_d_exp_data.h: New file. * sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c: Added vector exp test. * sysdeps/x86_64/fpu/test-double-vlen2.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen4-avx2.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen4.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen8.c: Likewise. * NEWS: Mention addition of x86_64 vector exp.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/libmvec.abilist4
-rw-r--r--sysdeps/x86/fpu/bits/math-vector.h6
-rw-r--r--sysdeps/x86_64/fpu/Makefile3
-rw-r--r--sysdeps/x86_64/fpu/Versions1
-rw-r--r--sysdeps/x86_64/fpu/libm-test-ulps12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/Makefile3
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S38
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S225
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S38
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S212
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S39
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S456
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp2_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp4_core.S29
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S25
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp8_core.S25
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp_data.S1088
-rw-r--r--sysdeps/x86_64/fpu/svml_d_exp_data.h52
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen2.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-avx2.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen4.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c1
-rw-r--r--sysdeps/x86_64/fpu/test-double-vlen8.c1
26 files changed, 2291 insertions, 2 deletions
diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
index 3593edc..ff9431f 100644
--- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
@@ -1,18 +1,21 @@
GLIBC_2.22
GLIBC_2.22 A
_ZGVbN2v_cos F
+ _ZGVbN2v_exp F
_ZGVbN2v_log F
_ZGVbN2v_sin F
_ZGVbN4v_cosf F
_ZGVbN4v_logf F
_ZGVbN4v_sinf F
_ZGVcN4v_cos F
+ _ZGVcN4v_exp F
_ZGVcN4v_log F
_ZGVcN4v_sin F
_ZGVcN8v_cosf F
_ZGVcN8v_logf F
_ZGVcN8v_sinf F
_ZGVdN4v_cos F
+ _ZGVdN4v_exp F
_ZGVdN4v_log F
_ZGVdN4v_sin F
_ZGVdN8v_cosf F
@@ -22,5 +25,6 @@ GLIBC_2.22
_ZGVeN16v_logf F
_ZGVeN16v_sinf F
_ZGVeN8v_cos F
+ _ZGVeN8v_exp F
_ZGVeN8v_log F
_ZGVeN8v_sin F
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h
index 5c3e492..9a353bc 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86/fpu/bits/math-vector.h
@@ -40,6 +40,8 @@
# define __DECL_SIMD_log __DECL_SIMD_x86_64
# undef __DECL_SIMD_logf
# define __DECL_SIMD_logf __DECL_SIMD_x86_64
+# undef __DECL_SIMD_exp
+# define __DECL_SIMD_exp __DECL_SIMD_x86_64
/* Workaround to exclude unnecessary symbol aliases in libmvec
while GCC creates the vector names based on scalar asm name.
@@ -53,6 +55,10 @@ __asm__ ("_ZGVbN4v___logf_finite = _ZGVbN4v_logf");
__asm__ ("_ZGVcN8v___logf_finite = _ZGVcN8v_logf");
__asm__ ("_ZGVdN8v___logf_finite = _ZGVdN8v_logf");
__asm__ ("_ZGVeN16v___logf_finite = _ZGVeN16v_logf");
+__asm__ ("_ZGVbN2v___exp_finite = _ZGVbN2v_exp");
+__asm__ ("_ZGVcN4v___exp_finite = _ZGVcN4v_exp");
+__asm__ ("_ZGVdN4v___exp_finite = _ZGVdN4v_exp");
+__asm__ ("_ZGVeN8v___exp_finite = _ZGVeN8v_exp");
# endif
#endif
diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile
index b610e3f..bd6d693 100644
--- a/sysdeps/x86_64/fpu/Makefile
+++ b/sysdeps/x86_64/fpu/Makefile
@@ -10,7 +10,8 @@ libmvec-support += svml_d_cos2_core svml_d_cos4_core_avx \
svml_d_log2_core svml_d_log4_core_avx svml_d_log4_core \
svml_d_log8_core svml_d_log_data svml_s_logf4_core \
svml_s_logf8_core_avx svml_s_logf8_core svml_s_logf16_core \
- svml_s_logf_data \
+ svml_s_logf_data svml_d_exp2_core svml_d_exp4_core_avx \
+ svml_d_exp4_core svml_d_exp8_core svml_d_exp_data \
init-arch
endif
diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions
index ecd1b70..00e34e7 100644
--- a/sysdeps/x86_64/fpu/Versions
+++ b/sysdeps/x86_64/fpu/Versions
@@ -3,6 +3,7 @@ libmvec {
_ZGVbN2v_cos; _ZGVcN4v_cos; _ZGVdN4v_cos; _ZGVeN8v_cos;
_ZGVbN2v_sin; _ZGVcN4v_sin; _ZGVdN4v_sin; _ZGVeN8v_sin;
_ZGVbN2v_log; _ZGVcN4v_log; _ZGVdN4v_log; _ZGVeN8v_log;
+ _ZGVbN2v_exp; _ZGVcN4v_exp; _ZGVdN4v_exp; _ZGVeN8v_exp;
_ZGVbN4v_cosf; _ZGVcN8v_cosf; _ZGVdN8v_cosf; _ZGVeN16v_cosf;
_ZGVbN4v_sinf; _ZGVcN8v_sinf; _ZGVdN8v_sinf; _ZGVeN16v_sinf;
_ZGVbN4v_logf; _ZGVcN8v_logf; _ZGVdN8v_logf; _ZGVeN16v_logf;
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 1812370..45ebc04 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1535,6 +1535,18 @@ idouble: 1
ildouble: 1
ldouble: 1
+Function: "exp_vlen2":
+double: 1
+
+Function: "exp_vlen4":
+double: 1
+
+Function: "exp_vlen4_avx2":
+double: 1
+
+Function: "exp_vlen8":
+double: 1
+
Function: "expm1":
double: 1
float: 1
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index 5fc6ea3..d6355ae 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -62,5 +62,6 @@ libmvec-sysdep_routines += svml_d_cos2_core_sse4 svml_d_cos4_core_avx2 \
svml_s_cosf16_core_avx512 svml_s_sinf4_core_sse4 \
svml_s_sinf8_core_avx2 svml_s_sinf16_core_avx512 \
svml_s_logf4_core_sse4 svml_s_logf8_core_avx2 \
- svml_s_logf16_core_avx512
+ svml_s_logf16_core_avx512 svml_d_exp2_core_sse4 \
+ svml_d_exp4_core_avx2 svml_d_exp8_core_avx512
endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S
new file mode 100644
index 0000000..ef3dc49
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S
@@ -0,0 +1,38 @@
+/* Multiple versions of vectorized exp.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+ .text
+ENTRY (_ZGVbN2v_exp)
+ .type _ZGVbN2v_exp, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features(%rip)
+ jne 1f
+ call __init_cpu_features
+1: leaq _ZGVbN2v_exp_sse4(%rip), %rax
+ testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+ jz 2f
+ ret
+2: leaq _ZGVbN2v_exp_sse2(%rip), %rax
+ ret
+END (_ZGVbN2v_exp)
+libmvec_hidden_def (_ZGVbN2v_exp)
+
+#define _ZGVbN2v_exp _ZGVbN2v_exp_sse2
+#include "../svml_d_exp2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S
new file mode 100644
index 0000000..1f54459
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S
@@ -0,0 +1,225 @@
+/* Function exp vectorized with SSE4.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_exp_data.h"
+
+ .text
+ENTRY (_ZGVbN2v_exp_sse4)
+/*
+ ALGORITHM DESCRIPTION:
+
+ Argument representation:
+ N = rint(X*2^k/ln2) = 2^k*M+j
+ X = N*ln2/2^k + r = M*ln2 + ln2*(j/2^k) + r
+ then -ln2/2^(k+1) < r < ln2/2^(k+1)
+ Alternatively:
+ N = trunc(X*2^k/ln2)
+ then 0 < r < ln2/2^k
+
+ Result calculation:
+ exp(X) = exp(M*ln2 + ln2*(j/2^k) + r)
+ = 2^M * 2^(j/2^k) * exp(r)
+ 2^M is calculated by bit manipulation
+ 2^(j/2^k) is stored in table
+ exp(r) is approximated by polynomial.
+
+ The table lookup is skipped if k = 0. */
+
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-64, %rsp
+ subq $320, %rsp
+ movaps %xmm0, %xmm3
+ movq __svml_dexp_data@GOTPCREL(%rip), %r8
+
+/* iAbsX = (int)(lX>>32), lX = *(longlong*)&X */
+ pshufd $221, %xmm3, %xmm7
+ movups __dbInvLn2(%r8), %xmm0
+
+/* dK = X*dbInvLn2 */
+ mulpd %xmm3, %xmm0
+ movq __iAbsMask(%r8), %xmm5
+ movq __iDomainRange(%r8), %xmm6
+
+/* iAbsX = iAbsX&iAbsMask */
+ pand %xmm5, %xmm7
+
+/* iRangeMask = (iAbsX>iDomainRange) */
+ pcmpgtd %xmm6, %xmm7
+
+/* Mask = iRangeMask?1:0, set mask for overflow/underflow */
+ movmskps %xmm7, %eax
+
+/* dN = rint(X*2^k/Ln2) */
+ xorps %xmm7, %xmm7
+ movups __dbLn2hi(%r8), %xmm5
+ movups __dbLn2lo(%r8), %xmm6
+ roundpd $0, %xmm0, %xmm7
+
+/* dR = X - dN*dbLn2hi, dbLn2hi is 52-8-k hi bits of ln2/2^k */
+ mulpd %xmm7, %xmm5
+
+/* dR = dR - dN*dbLn2lo, dbLn2lo is 40..94 bits of lo part of ln2/2^k */
+ mulpd %xmm6, %xmm7
+ movups __dbShifter(%r8), %xmm4
+
+/* dM = X*dbInvLn2+dbShifter */
+ addpd %xmm0, %xmm4
+ movaps %xmm3, %xmm0
+ subpd %xmm5, %xmm0
+ subpd %xmm7, %xmm0
+ movups __dPC2(%r8), %xmm5
+
+/* exp(r) = b0+r*(b0+r*(b1+r*b2)) */
+ mulpd %xmm0, %xmm5
+ addpd __dPC1(%r8), %xmm5
+ mulpd %xmm0, %xmm5
+ movups __dPC0(%r8), %xmm6
+ addpd %xmm6, %xmm5
+ mulpd %xmm5, %xmm0
+ movdqu __lIndexMask(%r8), %xmm2
+
+/* lIndex = (*(longlong*)&dM)&lIndexMask, lIndex is the lower K bits of lM */
+ movdqa %xmm2, %xmm1
+
+/* lM = (*(longlong*)&dM)&(~lIndexMask) */
+ pandn %xmm4, %xmm2
+ pand %xmm4, %xmm1
+
+/* lM = lM<<(52-K), 2^M */
+ psllq $42, %xmm2
+
+/* table lookup for dT[j] = 2^(j/2^k) */
+ movd %xmm1, %edx
+ pextrw $4, %xmm1, %ecx
+ addpd %xmm0, %xmm6
+ shll $3, %edx
+ shll $3, %ecx
+ movq (%r8,%rdx), %xmm0
+ andl $3, %eax
+ movhpd (%r8,%rcx), %xmm0
+
+/* 2^(j/2^k) * exp(r) */
+ mulpd %xmm6, %xmm0
+
+/* multiply by 2^M through integer add */
+ paddq %xmm2, %xmm0
+ jne .LBL_1_3
+
+.LBL_1_2:
+ cfi_remember_state
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+
+.LBL_1_3:
+ cfi_restore_state
+ movups %xmm3, 192(%rsp)
+ movups %xmm0, 256(%rsp)
+ je .LBL_1_2
+
+ xorb %cl, %cl
+ xorl %edx, %edx
+ movups %xmm8, 112(%rsp)
+ movups %xmm9, 96(%rsp)
+ movups %xmm10, 80(%rsp)
+ movups %xmm11, 64(%rsp)
+ movups %xmm12, 48(%rsp)
+ movups %xmm13, 32(%rsp)
+ movups %xmm14, 16(%rsp)
+ movups %xmm15, (%rsp)
+ movq %rsi, 136(%rsp)
+ movq %rdi, 128(%rsp)
+ movq %r12, 168(%rsp)
+ cfi_offset_rel_rsp (12, 168)
+ movb %cl, %r12b
+ movq %r13, 160(%rsp)
+ cfi_offset_rel_rsp (13, 160)
+ movl %eax, %r13d
+ movq %r14, 152(%rsp)
+ cfi_offset_rel_rsp (14, 152)
+ movl %edx, %r14d
+ movq %r15, 144(%rsp)
+ cfi_offset_rel_rsp (15, 144)
+ cfi_remember_state
+
+.LBL_1_6:
+ btl %r14d, %r13d
+ jc .LBL_1_12
+
+.LBL_1_7:
+ lea 1(%r14), %esi
+ btl %esi, %r13d
+ jc .LBL_1_10
+
+.LBL_1_8:
+ incb %r12b
+ addl $2, %r14d
+ cmpb $16, %r12b
+ jb .LBL_1_6
+
+ movups 112(%rsp), %xmm8
+ movups 96(%rsp), %xmm9
+ movups 80(%rsp), %xmm10
+ movups 64(%rsp), %xmm11
+ movups 48(%rsp), %xmm12
+ movups 32(%rsp), %xmm13
+ movups 16(%rsp), %xmm14
+ movups (%rsp), %xmm15
+ movq 136(%rsp), %rsi
+ movq 128(%rsp), %rdi
+ movq 168(%rsp), %r12
+ cfi_restore (%r12)
+ movq 160(%rsp), %r13
+ cfi_restore (%r13)
+ movq 152(%rsp), %r14
+ cfi_restore (%r14)
+ movq 144(%rsp), %r15
+ cfi_restore (%r15)
+ movups 256(%rsp), %xmm0
+ jmp .LBL_1_2
+
+.LBL_1_10:
+ cfi_restore_state
+ movzbl %r12b, %r15d
+ shlq $4, %r15
+ movsd 200(%rsp,%r15), %xmm0
+
+ call exp@PLT
+
+ movsd %xmm0, 264(%rsp,%r15)
+ jmp .LBL_1_8
+
+.LBL_1_12:
+ movzbl %r12b, %r15d
+ shlq $4, %r15
+ movsd 192(%rsp,%r15), %xmm0
+
+ call exp@PLT
+
+ movsd %xmm0, 256(%rsp,%r15)
+ jmp .LBL_1_7
+
+END (_ZGVbN2v_exp_sse4)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S
new file mode 100644
index 0000000..7f2ebde
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S
@@ -0,0 +1,38 @@
+/* Multiple versions of vectorized exp.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+ .text
+ENTRY (_ZGVdN4v_exp)
+ .type _ZGVdN4v_exp, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features(%rip)
+ jne 1f
+ call __init_cpu_features
+1: leaq _ZGVdN4v_exp_avx2(%rip), %rax
+ testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+ jz 2f
+ ret
+2: leaq _ZGVdN4v_exp_sse_wrapper(%rip), %rax
+ ret
+END (_ZGVdN4v_exp)
+libmvec_hidden_def (_ZGVdN4v_exp)
+
+#define _ZGVdN4v_exp _ZGVdN4v_exp_sse_wrapper
+#include "../svml_d_exp4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S
new file mode 100644
index 0000000..a34e267
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S
@@ -0,0 +1,212 @@
+/* Function exp vectorized with AVX2.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_exp_data.h"
+
+ .text
+ENTRY (_ZGVdN4v_exp_avx2)
+/*
+ ALGORITHM DESCRIPTION:
+
+ Argument representation:
+ N = rint(X*2^k/ln2) = 2^k*M+j
+ X = N*ln2/2^k + r = M*ln2 + ln2*(j/2^k) + r
+ then -ln2/2^(k+1) < r < ln2/2^(k+1)
+ Alternatively:
+ N = trunc(X*2^k/ln2)
+ then 0 < r < ln2/2^k
+
+ Result calculation:
+ exp(X) = exp(M*ln2 + ln2*(j/2^k) + r)
+ = 2^M * 2^(j/2^k) * exp(r)
+ 2^M is calculated by bit manipulation
+ 2^(j/2^k) is stored in table
+ exp(r) is approximated by polynomial
+
+ The table lookup is skipped if k = 0. */
+
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-64, %rsp
+ subq $448, %rsp
+ movq __svml_dexp_data@GOTPCREL(%rip), %rax
+ vmovdqa %ymm0, %ymm2
+ vmovupd __dbInvLn2(%rax), %ymm3
+ vmovupd __dbShifter(%rax), %ymm1
+ vmovupd __lIndexMask(%rax), %ymm4
+
+/* dM = X*dbInvLn2+dbShifter, dbInvLn2 = 2^k/Ln2 */
+ vfmadd213pd %ymm1, %ymm2, %ymm3
+
+/* iAbsX = (int)(lX>>32), lX = *(longlong*)&X */
+ vextracti128 $1, %ymm2, %xmm5
+ vshufps $221, %xmm5, %xmm2, %xmm6
+
+/* iAbsX = iAbsX&iAbsMask */
+ vandps __iAbsMask(%rax), %xmm6, %xmm7
+
+/* dN = dM-dbShifter, dN = rint(X*2^k/Ln2) */
+ vsubpd %ymm1, %ymm3, %ymm6
+
+/* iRangeMask = (iAbsX>iDomainRange) */
+ vpcmpgtd __iDomainRange(%rax), %xmm7, %xmm0
+ vmovupd __dbLn2hi(%rax), %ymm1
+ vmovupd __dPC0(%rax), %ymm7
+
+/* Mask = iRangeMask?1:0, set mask for overflow/underflow */
+ vmovmskps %xmm0, %ecx
+ vmovupd __dPC2(%rax), %ymm0
+
+/* dR = X - dN*dbLn2hi, dbLn2hi is 52-8-k hi bits of ln2/2^k */
+ vmovdqa %ymm2, %ymm5
+ vfnmadd231pd %ymm6, %ymm1, %ymm5
+
+/* dR = dR - dN*dbLn2lo, dbLn2lo is 40..94 bits of lo part of ln2/2^k */
+ vfnmadd132pd __dbLn2lo(%rax), %ymm5, %ymm6
+
+/* exp(r) = b0+r*(b0+r*(b1+r*b2)) */
+ vfmadd213pd __dPC1(%rax), %ymm6, %ymm0
+ vfmadd213pd %ymm7, %ymm6, %ymm0
+ vfmadd213pd %ymm7, %ymm6, %ymm0
+
+/* lIndex = (*(longlong*)&dM)&lIndexMask, lIndex is the lower K bits of lM */
+ vandps %ymm4, %ymm3, %ymm1
+
+/* table lookup for dT[j] = 2^(j/2^k) */
+ vxorpd %ymm6, %ymm6, %ymm6
+ vpcmpeqd %ymm5, %ymm5, %ymm5
+ vgatherqpd %ymm5, (%rax,%ymm1,8), %ymm6
+
+/* lM = (*(longlong*)&dM)&(~lIndexMask) */
+ vpandn %ymm3, %ymm4, %ymm3
+
+/* 2^(j/2^k) * exp(r) */
+ vmulpd %ymm0, %ymm6, %ymm0
+
+/* lM = lM<<(52-K), 2^M */
+ vpsllq $42, %ymm3, %ymm4
+
+/* multiply by 2^M through integer add */
+ vpaddq %ymm4, %ymm0, %ymm0
+ testl %ecx, %ecx
+ jne .LBL_1_3
+
+.LBL_1_2:
+ cfi_remember_state
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+
+.LBL_1_3:
+ cfi_restore_state
+ vmovupd %ymm2, 320(%rsp)
+ vmovupd %ymm0, 384(%rsp)
+ je .LBL_1_2
+
+ xorb %dl, %dl
+ xorl %eax, %eax
+ vmovups %ymm8, 224(%rsp)
+ vmovups %ymm9, 192(%rsp)
+ vmovups %ymm10, 160(%rsp)
+ vmovups %ymm11, 128(%rsp)
+ vmovups %ymm12, 96(%rsp)
+ vmovups %ymm13, 64(%rsp)
+ vmovups %ymm14, 32(%rsp)
+ vmovups %ymm15, (%rsp)
+ movq %rsi, 264(%rsp)
+ movq %rdi, 256(%rsp)
+ movq %r12, 296(%rsp)
+ cfi_offset_rel_rsp (12, 296)
+ movb %dl, %r12b
+ movq %r13, 288(%rsp)
+ cfi_offset_rel_rsp (13, 288)
+ movl %ecx, %r13d
+ movq %r14, 280(%rsp)
+ cfi_offset_rel_rsp (14, 280)
+ movl %eax, %r14d
+ movq %r15, 272(%rsp)
+ cfi_offset_rel_rsp (15, 272)
+ cfi_remember_state
+
+.LBL_1_6:
+ btl %r14d, %r13d
+ jc .LBL_1_12
+
+.LBL_1_7:
+ lea 1(%r14), %esi
+ btl %esi, %r13d
+ jc .LBL_1_10
+
+.LBL_1_8:
+ incb %r12b
+ addl $2, %r14d
+ cmpb $16, %r12b
+ jb .LBL_1_6
+
+ vmovups 224(%rsp), %ymm8
+ vmovups 192(%rsp), %ymm9
+ vmovups 160(%rsp), %ymm10
+ vmovups 128(%rsp), %ymm11
+ vmovups 96(%rsp), %ymm12
+ vmovups 64(%rsp), %ymm13
+ vmovups 32(%rsp), %ymm14
+ vmovups (%rsp), %ymm15
+ vmovupd 384(%rsp), %ymm0
+ movq 264(%rsp), %rsi
+ movq 256(%rsp), %rdi
+ movq 296(%rsp), %r12
+ cfi_restore (%r12)
+ movq 288(%rsp), %r13
+ cfi_restore (%r13)
+ movq 280(%rsp), %r14
+ cfi_restore (%r14)
+ movq 272(%rsp), %r15
+ cfi_restore (%r15)
+ jmp .LBL_1_2
+
+.LBL_1_10:
+ cfi_restore_state
+ movzbl %r12b, %r15d
+ shlq $4, %r15
+ vmovsd 328(%rsp,%r15), %xmm0
+ vzeroupper
+
+ call exp@PLT
+
+ vmovsd %xmm0, 392(%rsp,%r15)
+ jmp .LBL_1_8
+
+.LBL_1_12:
+ movzbl %r12b, %r15d
+ shlq $4, %r15
+ vmovsd 320(%rsp,%r15), %xmm0
+ vzeroupper
+
+ call exp@PLT
+
+ vmovsd %xmm0, 384(%rsp,%r15)
+ jmp .LBL_1_7
+
+END (_ZGVdN4v_exp_avx2)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S
new file mode 100644
index 0000000..8f837fb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S
@@ -0,0 +1,39 @@
+/* Multiple versions of vectorized exp.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+ .text
+ENTRY (_ZGVeN8v_exp)
+ .type _ZGVeN8v_exp, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features(%rip)
+ jne 1
+ call __init_cpu_features
+1: leaq _ZGVeN8v_exp_skx(%rip), %rax
+ testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+ jnz 3
+2: leaq _ZGVeN8v_exp_knl(%rip), %rax
+ testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+ jnz 3
+ leaq _ZGVeN8v_exp_avx2_wrapper(%rip), %rax
+3: ret
+END (_ZGVeN8v_exp)
+
+#define _ZGVeN8v_exp _ZGVeN8v_exp_avx2_wrapper
+#include "../svml_d_exp8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S
new file mode 100644
index 0000000..049a7e4
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S
@@ -0,0 +1,456 @@
+/* Function exp vectorized with AVX-512. KNL and SKX versions.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_exp_data.h"
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVeN8v_exp_knl)
+#ifndef HAVE_AVX512_ASM_SUPPORT
+WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
+#else
+/*
+ ALGORITHM DESCRIPTION:
+
+ Argument representation:
+ N = rint(X*2^k/ln2) = 2^k*M+j
+ X = N*ln2/2^k + r = M*ln2 + ln2*(j/2^k) + r
+ then -ln2/2^(k+1) < r < ln2/2^(k+1)
+ Alternatively:
+ N = trunc(X*2^k/ln2)
+ then 0 < r < ln2/2^k
+
+ Result calculation:
+ exp(X) = exp(M*ln2 + ln2*(j/2^k) + r)
+ = 2^M * 2^(j/2^k) * exp(r)
+ 2^M is calculated by bit manipulation
+ 2^(j/2^k) is stored in table
+ exp(r) is approximated by polynomial
+
+ The table lookup is skipped if k = 0. */
+
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-64, %rsp
+ subq $1280, %rsp
+ movq __svml_dexp_data@GOTPCREL(%rip), %rax
+
+/* dR = X - dN*dbLn2hi, dbLn2hi is 52-8-k hi bits of ln2/2^k */
+ vmovaps %zmm0, %zmm8
+
+/* iAbsX = (int)(lX>>32), lX = *(longlong*)&X */
+ vpsrlq $32, %zmm0, %zmm1
+
+/* iAbsX = iAbsX&iAbsMask */
+ movl $255, %edx
+ vpmovqd %zmm1, %ymm2
+ kmovw %edx, %k2
+
+/* iRangeMask = (iAbsX>iDomainRange) */
+ movl $-1, %ecx
+
+/* table lookup for dT[j] = 2^(j/2^k) */
+ vpxord %zmm11, %zmm11, %zmm11
+ vmovups __dbInvLn2(%rax), %zmm5
+ vmovups __dbLn2hi(%rax), %zmm7
+ kxnorw %k3, %k3, %k3
+
+/* dM = X*dbInvLn2+dbShifter, dbInvLn2 = 2^k/Ln2 */
+ vfmadd213pd __dbShifter(%rax), %zmm0, %zmm5
+ vmovups __dPC2(%rax), %zmm12
+
+/* dN = dM-dbShifter, dN = rint(X*2^k/Ln2) */
+ vsubpd __dbShifter(%rax), %zmm5, %zmm9
+ vmovups __lIndexMask(%rax), %zmm4
+ vfnmadd231pd %zmm9, %zmm7, %zmm8
+ vpandd __iAbsMask(%rax), %zmm2, %zmm2{%k2}
+
+/* lIndex = (*(longlong*)&dM)&lIndexMask, lIndex is the lower K bits of lM */
+ vpandq %zmm4, %zmm5, %zmm10
+ vgatherqpd (%rax,%zmm10,8), %zmm11{%k3}
+ vpcmpgtd __iDomainRange(%rax), %zmm2, %k1{%k2}
+
+/* lM = (*(longlong*)&dM)&(~lIndexMask) */
+ vpandnq %zmm5, %zmm4, %zmm6
+ vpbroadcastd %ecx, %zmm3{%k1}{z}
+
+/* lM = lM<<(52-K), 2^M */
+ vpsllq $42, %zmm6, %zmm14
+
+/* dR = dR - dN*dbLn2lo, dbLn2lo is 40..94 bits of lo part of ln2/2^k */
+ vfnmadd132pd __dbLn2lo(%rax), %zmm8, %zmm9
+
+/* Mask = iRangeMask?1:0, set mask for overflow/underflow */
+ vptestmd %zmm3, %zmm3, %k0{%k2}
+
+/* exp(r) = b0+r*(b0+r*(b1+r*b2)) */
+ vfmadd213pd __dPC1(%rax), %zmm9, %zmm12
+ kmovw %k0, %ecx
+ movzbl %cl, %ecx
+ vfmadd213pd __dPC0(%rax), %zmm9, %zmm12
+ vfmadd213pd __dPC0(%rax), %zmm9, %zmm12
+
+/* 2^(j/2^k) * exp(r) */
+ vmulpd %zmm12, %zmm11, %zmm13
+
+/* multiply by 2^M through integer add */
+ vpaddq %zmm14, %zmm13, %zmm1
+ testl %ecx, %ecx
+ jne .LBL_1_3
+
+.LBL_1_2:
+ cfi_remember_state
+ vmovaps %zmm1, %zmm0
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+
+.LBL_1_3:
+ cfi_restore_state
+ vmovups %zmm0, 1152(%rsp)
+ vmovups %zmm1, 1216(%rsp)
+ je .LBL_1_2
+
+ xorb %dl, %dl
+ kmovw %k4, 1048(%rsp)
+ xorl %eax, %eax
+ kmovw %k5, 1040(%rsp)
+ kmovw %k6, 1032(%rsp)
+ kmovw %k7, 1024(%rsp)
+ vmovups %zmm16, 960(%rsp)
+ vmovups %zmm17, 896(%rsp)
+ vmovups %zmm18, 832(%rsp)
+ vmovups %zmm19, 768(%rsp)
+ vmovups %zmm20, 704(%rsp)
+ vmovups %zmm21, 640(%rsp)
+ vmovups %zmm22, 576(%rsp)
+ vmovups %zmm23, 512(%rsp)
+ vmovups %zmm24, 448(%rsp)
+ vmovups %zmm25, 384(%rsp)
+ vmovups %zmm26, 320(%rsp)
+ vmovups %zmm27, 256(%rsp)
+ vmovups %zmm28, 192(%rsp)
+ vmovups %zmm29, 128(%rsp)
+ vmovups %zmm30, 64(%rsp)
+ vmovups %zmm31, (%rsp)
+ movq %rsi, 1064(%rsp)
+ movq %rdi, 1056(%rsp)
+ movq %r12, 1096(%rsp)
+ cfi_offset_rel_rsp (12, 1096)
+ movb %dl, %r12b
+ movq %r13, 1088(%rsp)
+ cfi_offset_rel_rsp (13, 1088)
+ movl %ecx, %r13d
+ movq %r14, 1080(%rsp)
+ cfi_offset_rel_rsp (14, 1080)
+ movl %eax, %r14d
+ movq %r15, 1072(%rsp)
+ cfi_offset_rel_rsp (15, 1072)
+ cfi_remember_state
+
+.LBL_1_6:
+ btl %r14d, %r13d
+ jc .LBL_1_12
+
+.LBL_1_7:
+ lea 1(%r14), %esi
+ btl %esi, %r13d
+ jc .LBL_1_10
+
+.LBL_1_8:
+ addb $1, %r12b
+ addl $2, %r14d
+ cmpb $16, %r12b
+ jb .LBL_1_6
+
+ kmovw 1048(%rsp), %k4
+ movq 1064(%rsp), %rsi
+ kmovw 1040(%rsp), %k5
+ movq 1056(%rsp), %rdi
+ kmovw 1032(%rsp), %k6
+ movq 1096(%rsp), %r12
+ cfi_restore (%r12)
+ movq 1088(%rsp), %r13
+ cfi_restore (%r13)
+ kmovw 1024(%rsp), %k7
+ vmovups 960(%rsp), %zmm16
+ vmovups 896(%rsp), %zmm17
+ vmovups 832(%rsp), %zmm18
+ vmovups 768(%rsp), %zmm19
+ vmovups 704(%rsp), %zmm20
+ vmovups 640(%rsp), %zmm21
+ vmovups 576(%rsp), %zmm22
+ vmovups 512(%rsp), %zmm23
+ vmovups 448(%rsp), %zmm24
+ vmovups 384(%rsp), %zmm25
+ vmovups 320(%rsp), %zmm26
+ vmovups 256(%rsp), %zmm27
+ vmovups 192(%rsp), %zmm28
+ vmovups 128(%rsp), %zmm29
+ vmovups 64(%rsp), %zmm30
+ vmovups (%rsp), %zmm31
+ movq 1080(%rsp), %r14
+ cfi_restore (%r14)
+ movq 1072(%rsp), %r15
+ cfi_restore (%r15)
+ vmovups 1216(%rsp), %zmm1
+ jmp .LBL_1_2
+
+.LBL_1_10:
+ cfi_restore_state
+ movzbl %r12b, %r15d
+ shlq $4, %r15
+ vmovsd 1160(%rsp,%r15), %xmm0
+ call exp@PLT
+ vmovsd %xmm0, 1224(%rsp,%r15)
+ jmp .LBL_1_8
+
+.LBL_1_12:
+ movzbl %r12b, %r15d
+ shlq $4, %r15
+ vmovsd 1152(%rsp,%r15), %xmm0
+ call exp@PLT
+ vmovsd %xmm0, 1216(%rsp,%r15)
+ jmp .LBL_1_7
+#endif
+END (_ZGVeN8v_exp_knl)
+
+ENTRY (_ZGVeN8v_exp_skx)
+#ifndef HAVE_AVX512_ASM_SUPPORT
+WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
+#else
+/*
+ ALGORITHM DESCRIPTION:
+
+ Argument representation:
+ N = rint(X*2^k/ln2) = 2^k*M+j
+ X = N*ln2/2^k + r = M*ln2 + ln2*(j/2^k) + r
+ then -ln2/2^(k+1) < r < ln2/2^(k+1)
+ Alternatively:
+ N = trunc(X*2^k/ln2)
+ then 0 < r < ln2/2^k
+
+ Result calculation:
+ exp(X) = exp(M*ln2 + ln2*(j/2^k) + r)
+ = 2^M * 2^(j/2^k) * exp(r)
+ 2^M is calculated by bit manipulation
+ 2^(j/2^k) is stored in table
+ exp(r) is approximated by polynomial
+
+ The table lookup is skipped if k = 0. */
+
+ pushq %rbp
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (%rbp, 0)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-64, %rsp
+ subq $1280, %rsp
+ movq __svml_dexp_data@GOTPCREL(%rip), %rax
+
+/* table lookup for dT[j] = 2^(j/2^k) */
+ kxnorw %k1, %k1, %k1
+
+/* iAbsX = (int)(lX>>32), lX = *(longlong*)&X */
+ vpsrlq $32, %zmm0, %zmm1
+ vmovups __dbInvLn2(%rax), %zmm7
+ vmovups __dbShifter(%rax), %zmm5
+ vmovups __lIndexMask(%rax), %zmm6
+ vmovups __dbLn2hi(%rax), %zmm9
+ vmovups __dPC0(%rax), %zmm12
+
+/* dM = X*dbInvLn2+dbShifter, dbInvLn2 = 2^k/Ln2 */
+ vfmadd213pd %zmm5, %zmm0, %zmm7
+ vpmovqd %zmm1, %ymm2
+
+/* dN = dM-dbShifter, dN = rint(X*2^k/Ln2) */
+ vsubpd %zmm5, %zmm7, %zmm11
+
+/* iAbsX = iAbsX&iAbsMask */
+ vpand __iAbsMask(%rax), %ymm2, %ymm3
+
+/* dR = X - dN*dbLn2hi, dbLn2hi is 52-8-k hi bits of ln2/2^k */
+ vmovaps %zmm0, %zmm10
+ vfnmadd231pd %zmm11, %zmm9, %zmm10
+ vmovups __dPC2(%rax), %zmm9
+
+/* dR = dR - dN*dbLn2lo, dbLn2lo is 40..94 bits of lo part of ln2/2^k */
+ vfnmadd132pd __dbLn2lo(%rax), %zmm10, %zmm11
+
+/* exp(r) = b0+r*(b0+r*(b1+r*b2)) */
+ vfmadd213pd __dPC1(%rax), %zmm11, %zmm9
+ vfmadd213pd %zmm12, %zmm11, %zmm9
+ vfmadd213pd %zmm12, %zmm11, %zmm9
+
+/* iRangeMask = (iAbsX>iDomainRange) */
+ vpcmpgtd __iDomainRange(%rax), %ymm3, %ymm4
+
+/* Mask = iRangeMask?1:0, set mask for overflow/underflow */
+ vmovmskps %ymm4, %ecx
+
+/* lIndex = (*(longlong*)&dM)&lIndexMask, lIndex is the lower K bits of lM */
+ vpandq %zmm6, %zmm7, %zmm13
+ vpmovqd %zmm13, %ymm14
+ vpxord %zmm15, %zmm15, %zmm15
+ vgatherdpd (%rax,%ymm14,8), %zmm15{%k1}
+
+/* 2^(j/2^k) * exp(r) */
+ vmulpd %zmm9, %zmm15, %zmm10
+
+/* lM = (*(longlong*)&dM)&(~lIndexMask) */
+ vpandnq %zmm7, %zmm6, %zmm8
+
+/* lM = lM<<(52-K), 2^M */
+ vpsllq $42, %zmm8, %zmm1
+
+/* multiply by 2^M through integer add */
+ vpaddq %zmm1, %zmm10, %zmm1
+ testl %ecx, %ecx
+ jne .LBL_2_3
+
+.LBL_2_2:
+ cfi_remember_state
+ vmovaps %zmm1, %zmm0
+ movq %rbp, %rsp
+ cfi_def_cfa_register (%rsp)
+ popq %rbp
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (%rbp)
+ ret
+
+.LBL_2_3:
+ cfi_restore_state
+ vmovups %zmm0, 1152(%rsp)
+ vmovups %zmm1, 1216(%rsp)
+ je .LBL_2_2
+
+ xorb %dl, %dl
+ xorl %eax, %eax
+ kmovw %k4, 1048(%rsp)
+ kmovw %k5, 1040(%rsp)
+ kmovw %k6, 1032(%rsp)
+ kmovw %k7, 1024(%rsp)
+ vmovups %zmm16, 960(%rsp)
+ vmovups %zmm17, 896(%rsp)
+ vmovups %zmm18, 832(%rsp)
+ vmovups %zmm19, 768(%rsp)
+ vmovups %zmm20, 704(%rsp)
+ vmovups %zmm21, 640(%rsp)
+ vmovups %zmm22, 576(%rsp)
+ vmovups %zmm23, 512(%rsp)
+ vmovups %zmm24, 448(%rsp)
+ vmovups %zmm25, 384(%rsp)
+ vmovups %zmm26, 320(%rsp)
+ vmovups %zmm27, 256(%rsp)
+ vmovups %zmm28, 192(%rsp)
+ vmovups %zmm29, 128(%rsp)
+ vmovups %zmm30, 64(%rsp)
+ vmovups %zmm31, (%rsp)
+ movq %rsi, 1064(%rsp)
+ movq %rdi, 1056(%rsp)
+ movq %r12, 1096(%rsp)
+ cfi_offset_rel_rsp (12, 1096)
+ movb %dl, %r12b
+ movq %r13, 1088(%rsp)
+ cfi_offset_rel_rsp (13, 1088)
+ movl %ecx, %r13d
+ movq %r14, 1080(%rsp)
+ cfi_offset_rel_rsp (14, 1080)
+ movl %eax, %r14d
+ movq %r15, 1072(%rsp)
+ cfi_offset_rel_rsp (15, 1072)
+ cfi_remember_state
+
+.LBL_2_6:
+ btl %r14d, %r13d
+ jc .LBL_2_12
+
+.LBL_2_7:
+ lea 1(%r14), %esi
+ btl %esi, %r13d
+ jc .LBL_2_10
+
+.LBL_2_8:
+ incb %r12b
+ addl $2, %r14d
+ cmpb $16, %r12b
+ jb .LBL_2_6
+
+ kmovw 1048(%rsp), %k4
+ kmovw 1040(%rsp), %k5
+ kmovw 1032(%rsp), %k6
+ kmovw 1024(%rsp), %k7
+ vmovups 960(%rsp), %zmm16
+ vmovups 896(%rsp), %zmm17
+ vmovups 832(%rsp), %zmm18
+ vmovups 768(%rsp), %zmm19
+ vmovups 704(%rsp), %zmm20
+ vmovups 640(%rsp), %zmm21
+ vmovups 576(%rsp), %zmm22
+ vmovups 512(%rsp), %zmm23
+ vmovups 448(%rsp), %zmm24
+ vmovups 384(%rsp), %zmm25
+ vmovups 320(%rsp), %zmm26
+ vmovups 256(%rsp), %zmm27
+ vmovups 192(%rsp), %zmm28
+ vmovups 128(%rsp), %zmm29
+ vmovups 64(%rsp), %zmm30
+ vmovups (%rsp), %zmm31
+ vmovups 1216(%rsp), %zmm1
+ movq 1064(%rsp), %rsi
+ movq 1056(%rsp), %rdi
+ movq 1096(%rsp), %r12
+ cfi_restore (%r12)
+ movq 1088(%rsp), %r13
+ cfi_restore (%r13)
+ movq 1080(%rsp), %r14
+ cfi_restore (%r14)
+ movq 1072(%rsp), %r15
+ cfi_restore (%r15)
+ jmp .LBL_2_2
+
+.LBL_2_10:
+ cfi_restore_state
+ movzbl %r12b, %r15d
+ shlq $4, %r15
+ vmovsd 1160(%rsp,%r15), %xmm0
+ vzeroupper
+ vmovsd 1160(%rsp,%r15), %xmm0
+ call exp@PLT
+ vmovsd %xmm0, 1224(%rsp,%r15)
+ jmp .LBL_2_8
+
+.LBL_2_12:
+ movzbl %r12b, %r15d
+ shlq $4, %r15
+ vmovsd 1152(%rsp,%r15), %xmm0
+ vzeroupper
+ vmovsd 1152(%rsp,%r15), %xmm0
+ call exp@PLT
+ vmovsd %xmm0, 1216(%rsp,%r15)
+ jmp .LBL_2_7
+
+#endif
+END (_ZGVeN8v_exp_skx)
diff --git a/sysdeps/x86_64/fpu/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/svml_d_exp2_core.S
new file mode 100644
index 0000000..ca3dd76
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_d_exp2_core.S
@@ -0,0 +1,29 @@
+/* Function exp vectorized with SSE2.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVbN2v_exp)
+WRAPPER_IMPL_SSE2 exp
+END (_ZGVbN2v_exp)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVbN2v_exp)
+#endif
diff --git a/sysdeps/x86_64/fpu/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/svml_d_exp4_core.S
new file mode 100644
index 0000000..d497811
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_d_exp4_core.S
@@ -0,0 +1,29 @@
+/* Function exp vectorized with AVX2, wrapper version.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVdN4v_exp)
+WRAPPER_IMPL_AVX _ZGVbN2v_exp
+END (_ZGVdN4v_exp)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVdN4v_exp)
+#endif
diff --git a/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S
new file mode 100644
index 0000000..5dd2f6c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S
@@ -0,0 +1,25 @@
+/* Function exp vectorized in AVX ISA as wrapper to SSE4 ISA version.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVcN4v_exp)
+WRAPPER_IMPL_AVX _ZGVbN2v_exp
+END (_ZGVcN4v_exp)
diff --git a/sysdeps/x86_64/fpu/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/svml_d_exp8_core.S
new file mode 100644
index 0000000..3e273a3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_d_exp8_core.S
@@ -0,0 +1,25 @@
+/* Function exp vectorized with AVX-512. Wrapper to AVX2 version.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+ .text
+ENTRY (_ZGVeN8v_exp)
+WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
+END (_ZGVeN8v_exp)
diff --git a/sysdeps/x86_64/fpu/svml_d_exp_data.S b/sysdeps/x86_64/fpu/svml_d_exp_data.S
new file mode 100644
index 0000000..66fa3b8
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_d_exp_data.S
@@ -0,0 +1,1088 @@
+/* Data for vector function exp.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "svml_d_exp_data.h"
+
+ .section .rodata, "a"
+ .align 64
+
+/* Data table for vector implementations of function exp.
+ * The table may contain polynomial, reduction, lookup
+ * coefficients and other constants obtained through different
+ * methods of research and experimental work. */
+ .globl __svml_dexp_data
+__svml_dexp_data:
+
+/* Lookup table of 2^(j/2^K): */
+.if .-__svml_dexp_data != __dbT
+.err
+.endif
+ .quad 0x3ff0000000000000
+ .quad 0x3ff002c605e2e8cf
+ .quad 0x3ff0058c86da1c0a
+ .quad 0x3ff0085382faef83
+ .quad 0x3ff00b1afa5abcbf
+ .quad 0x3ff00de2ed0ee0f5
+ .quad 0x3ff010ab5b2cbd11
+ .quad 0x3ff0137444c9b5b5
+ .quad 0x3ff0163da9fb3335
+ .quad 0x3ff019078ad6a19f
+ .quad 0x3ff01bd1e77170b4
+ .quad 0x3ff01e9cbfe113ef
+ .quad 0x3ff02168143b0281
+ .quad 0x3ff02433e494b755
+ .quad 0x3ff027003103b10e
+ .quad 0x3ff029ccf99d720a
+ .quad 0x3ff02c9a3e778061
+ .quad 0x3ff02f67ffa765e6
+ .quad 0x3ff032363d42b027
+ .quad 0x3ff03504f75ef071
+ .quad 0x3ff037d42e11bbcc
+ .quad 0x3ff03aa3e170aafe
+ .quad 0x3ff03d7411915a8a
+ .quad 0x3ff04044be896ab6
+ .quad 0x3ff04315e86e7f85
+ .quad 0x3ff045e78f5640b9
+ .quad 0x3ff048b9b35659d8
+ .quad 0x3ff04b8c54847a28
+ .quad 0x3ff04e5f72f654b1
+ .quad 0x3ff051330ec1a03f
+ .quad 0x3ff0540727fc1762
+ .quad 0x3ff056dbbebb786b
+ .quad 0x3ff059b0d3158574
+ .quad 0x3ff05c866520045b
+ .quad 0x3ff05f5c74f0bec2
+ .quad 0x3ff06233029d8216
+ .quad 0x3ff0650a0e3c1f89
+ .quad 0x3ff067e197e26c14
+ .quad 0x3ff06ab99fa6407c
+ .quad 0x3ff06d92259d794d
+ .quad 0x3ff0706b29ddf6de
+ .quad 0x3ff07344ac7d9d51
+ .quad 0x3ff0761ead925493
+ .quad 0x3ff078f92d32085d
+ .quad 0x3ff07bd42b72a836
+ .quad 0x3ff07eafa86a2771
+ .quad 0x3ff0818ba42e7d30
+ .quad 0x3ff084681ed5a462
+ .quad 0x3ff0874518759bc8
+ .quad 0x3ff08a22912465f2
+ .quad 0x3ff08d0088f8093f
+ .quad 0x3ff08fdf00068fe2
+ .quad 0x3ff092bdf66607e0
+ .quad 0x3ff0959d6c2c830d
+ .quad 0x3ff0987d61701716
+ .quad 0x3ff09b5dd646dd77
+ .quad 0x3ff09e3ecac6f383
+ .quad 0x3ff0a1203f067a63
+ .quad 0x3ff0a402331b9715
+ .quad 0x3ff0a6e4a71c726e
+ .quad 0x3ff0a9c79b1f3919
+ .quad 0x3ff0acab0f3a1b9c
+ .quad 0x3ff0af8f03834e52
+ .quad 0x3ff0b27378110974
+ .quad 0x3ff0b5586cf9890f
+ .quad 0x3ff0b83de2530d11
+ .quad 0x3ff0bb23d833d93f
+ .quad 0x3ff0be0a4eb2353b
+ .quad 0x3ff0c0f145e46c85
+ .quad 0x3ff0c3d8bde0ce7a
+ .quad 0x3ff0c6c0b6bdae53
+ .quad 0x3ff0c9a93091632a
+ .quad 0x3ff0cc922b7247f7
+ .quad 0x3ff0cf7ba776bb94
+ .quad 0x3ff0d265a4b520ba
+ .quad 0x3ff0d5502343de02
+ .quad 0x3ff0d83b23395dec
+ .quad 0x3ff0db26a4ac0ed5
+ .quad 0x3ff0de12a7b26300
+ .quad 0x3ff0e0ff2c62d096
+ .quad 0x3ff0e3ec32d3d1a2
+ .quad 0x3ff0e6d9bb1be415
+ .quad 0x3ff0e9c7c55189c6
+ .quad 0x3ff0ecb6518b4874
+ .quad 0x3ff0efa55fdfa9c5
+ .quad 0x3ff0f294f0653b45
+ .quad 0x3ff0f58503328e6d
+ .quad 0x3ff0f875985e389b
+ .quad 0x3ff0fb66affed31b
+ .quad 0x3ff0fe584a2afb21
+ .quad 0x3ff1014a66f951ce
+ .quad 0x3ff1043d06807c2f
+ .quad 0x3ff1073028d7233e
+ .quad 0x3ff10a23ce13f3e2
+ .quad 0x3ff10d17f64d9ef1
+ .quad 0x3ff1100ca19ad92f
+ .quad 0x3ff11301d0125b51
+ .quad 0x3ff115f781cae1fa
+ .quad 0x3ff118edb6db2dc1
+ .quad 0x3ff11be46f5a032c
+ .quad 0x3ff11edbab5e2ab6
+ .quad 0x3ff121d36afe70c9
+ .quad 0x3ff124cbae51a5c8
+ .quad 0x3ff127c4756e9e05
+ .quad 0x3ff12abdc06c31cc
+ .quad 0x3ff12db78f613d5b
+ .quad 0x3ff130b1e264a0e9
+ .quad 0x3ff133acb98d40a2
+ .quad 0x3ff136a814f204ab
+ .quad 0x3ff139a3f4a9d922
+ .quad 0x3ff13ca058cbae1e
+ .quad 0x3ff13f9d416e77af
+ .quad 0x3ff1429aaea92de0
+ .quad 0x3ff14598a092ccb7
+ .quad 0x3ff1489717425438
+ .quad 0x3ff14b9612cec861
+ .quad 0x3ff14e95934f312e
+ .quad 0x3ff1519598da9a9a
+ .quad 0x3ff154962388149e
+ .quad 0x3ff15797336eb333
+ .quad 0x3ff15a98c8a58e51
+ .quad 0x3ff15d9ae343c1f2
+ .quad 0x3ff1609d83606e12
+ .quad 0x3ff163a0a912b6ac
+ .quad 0x3ff166a45471c3c2
+ .quad 0x3ff169a88594c157
+ .quad 0x3ff16cad3c92df73
+ .quad 0x3ff16fb279835224
+ .quad 0x3ff172b83c7d517b
+ .quad 0x3ff175be85981992
+ .quad 0x3ff178c554eaea89
+ .quad 0x3ff17bccaa8d0888
+ .quad 0x3ff17ed48695bbc0
+ .quad 0x3ff181dce91c506a
+ .quad 0x3ff184e5d23816c9
+ .quad 0x3ff187ef4200632b
+ .quad 0x3ff18af9388c8dea
+ .quad 0x3ff18e03b5f3f36b
+ .quad 0x3ff1910eba4df41f
+ .quad 0x3ff1941a45b1f487
+ .quad 0x3ff1972658375d2f
+ .quad 0x3ff19a32f1f59ab4
+ .quad 0x3ff19d4013041dc2
+ .quad 0x3ff1a04dbb7a5b13
+ .quad 0x3ff1a35beb6fcb75
+ .quad 0x3ff1a66aa2fbebc7
+ .quad 0x3ff1a979e2363cf8
+ .quad 0x3ff1ac89a936440d
+ .quad 0x3ff1af99f8138a1c
+ .quad 0x3ff1b2aacee59c53
+ .quad 0x3ff1b5bc2dc40bf0
+ .quad 0x3ff1b8ce14c66e4c
+ .quad 0x3ff1bbe084045cd4
+ .quad 0x3ff1bef37b95750b
+ .quad 0x3ff1c206fb91588f
+ .quad 0x3ff1c51b040fad15
+ .quad 0x3ff1c82f95281c6b
+ .quad 0x3ff1cb44aef2547a
+ .quad 0x3ff1ce5a51860746
+ .quad 0x3ff1d1707cfaeaed
+ .quad 0x3ff1d4873168b9aa
+ .quad 0x3ff1d79e6ee731d7
+ .quad 0x3ff1dab6358e15e8
+ .quad 0x3ff1ddce85752c71
+ .quad 0x3ff1e0e75eb44027
+ .quad 0x3ff1e400c1631fdb
+ .quad 0x3ff1e71aad999e82
+ .quad 0x3ff1ea35236f9330
+ .quad 0x3ff1ed5022fcd91d
+ .quad 0x3ff1f06bac594fa0
+ .quad 0x3ff1f387bf9cda38
+ .quad 0x3ff1f6a45cdf6085
+ .quad 0x3ff1f9c18438ce4d
+ .quad 0x3ff1fcdf35c1137a
+ .quad 0x3ff1fffd7190241e
+ .quad 0x3ff2031c37bdf872
+ .quad 0x3ff2063b88628cd6
+ .quad 0x3ff2095b6395e1d2
+ .quad 0x3ff20c7bc96ffc18
+ .quad 0x3ff20f9cba08e483
+ .quad 0x3ff212be3578a819
+ .quad 0x3ff215e03bd7580c
+ .quad 0x3ff21902cd3d09b9
+ .quad 0x3ff21c25e9c1d6aa
+ .quad 0x3ff21f49917ddc96
+ .quad 0x3ff2226dc4893d64
+ .quad 0x3ff2259282fc1f27
+ .quad 0x3ff228b7cceeac25
+ .quad 0x3ff22bdda27912d1
+ .quad 0x3ff22f0403b385d2
+ .quad 0x3ff2322af0b63bff
+ .quad 0x3ff2355269997062
+ .quad 0x3ff2387a6e756238
+ .quad 0x3ff23ba2ff6254f4
+ .quad 0x3ff23ecc1c78903a
+ .quad 0x3ff241f5c5d05fe6
+ .quad 0x3ff2451ffb82140a
+ .quad 0x3ff2484abda600ef
+ .quad 0x3ff24b760c547f15
+ .quad 0x3ff24ea1e7a5eb35
+ .quad 0x3ff251ce4fb2a63f
+ .quad 0x3ff254fb44931561
+ .quad 0x3ff25828c65fa1ff
+ .quad 0x3ff25b56d530b9bc
+ .quad 0x3ff25e85711ece75
+ .quad 0x3ff261b49a425645
+ .quad 0x3ff264e450b3cb82
+ .quad 0x3ff26814948bacc3
+ .quad 0x3ff26b4565e27cdd
+ .quad 0x3ff26e76c4d0c2e5
+ .quad 0x3ff271a8b16f0a30
+ .quad 0x3ff274db2bd5e254
+ .quad 0x3ff2780e341ddf29
+ .quad 0x3ff27b41ca5f98cb
+ .quad 0x3ff27e75eeb3ab98
+ .quad 0x3ff281aaa132b832
+ .quad 0x3ff284dfe1f56381
+ .quad 0x3ff28815b11456b1
+ .quad 0x3ff28b4c0ea83f36
+ .quad 0x3ff28e82fac9ceca
+ .quad 0x3ff291ba7591bb70
+ .quad 0x3ff294f27f18bf72
+ .quad 0x3ff2982b17779965
+ .quad 0x3ff29b643ec70c27
+ .quad 0x3ff29e9df51fdee1
+ .quad 0x3ff2a1d83a9add08
+ .quad 0x3ff2a5130f50d65c
+ .quad 0x3ff2a84e735a9eec
+ .quad 0x3ff2ab8a66d10f13
+ .quad 0x3ff2aec6e9cd037b
+ .quad 0x3ff2b203fc675d1f
+ .quad 0x3ff2b5419eb90148
+ .quad 0x3ff2b87fd0dad990
+ .quad 0x3ff2bbbe92e5d3e3
+ .quad 0x3ff2befde4f2e280
+ .quad 0x3ff2c23dc71afbf7
+ .quad 0x3ff2c57e39771b2f
+ .quad 0x3ff2c8bf3c203f5f
+ .quad 0x3ff2cc00cf2f6c18
+ .quad 0x3ff2cf42f2bda93d
+ .quad 0x3ff2d285a6e4030b
+ .quad 0x3ff2d5c8ebbb8a15
+ .quad 0x3ff2d90cc15d5346
+ .quad 0x3ff2dc5127e277e3
+ .quad 0x3ff2df961f641589
+ .quad 0x3ff2e2dba7fb4e33
+ .quad 0x3ff2e621c1c14833
+ .quad 0x3ff2e9686ccf2e3b
+ .quad 0x3ff2ecafa93e2f56
+ .quad 0x3ff2eff777277ef0
+ .quad 0x3ff2f33fd6a454d2
+ .quad 0x3ff2f688c7cded23
+ .quad 0x3ff2f9d24abd886b
+ .quad 0x3ff2fd1c5f8c6b93
+ .quad 0x3ff300670653dfe4
+ .quad 0x3ff303b23f2d330b
+ .quad 0x3ff306fe0a31b715
+ .quad 0x3ff30a4a677ac276
+ .quad 0x3ff30d975721b004
+ .quad 0x3ff310e4d93fdefb
+ .quad 0x3ff31432edeeb2fd
+ .quad 0x3ff3178195479413
+ .quad 0x3ff31ad0cf63eeac
+ .quad 0x3ff31e209c5d33a0
+ .quad 0x3ff32170fc4cd831
+ .quad 0x3ff324c1ef4c560a
+ .quad 0x3ff3281375752b40
+ .quad 0x3ff32b658ee0da54
+ .quad 0x3ff32eb83ba8ea32
+ .quad 0x3ff3320b7be6e633
+ .quad 0x3ff3355f4fb45e20
+ .quad 0x3ff338b3b72ae62d
+ .quad 0x3ff33c08b26416ff
+ .quad 0x3ff33f5e41798daa
+ .quad 0x3ff342b46484ebb4
+ .quad 0x3ff3460b1b9fd712
+ .quad 0x3ff3496266e3fa2d
+ .quad 0x3ff34cba466b03e1
+ .quad 0x3ff35012ba4ea77d
+ .quad 0x3ff3536bc2a89cc4
+ .quad 0x3ff356c55f929ff1
+ .quad 0x3ff35a1f912671b1
+ .quad 0x3ff35d7a577dd72b
+ .quad 0x3ff360d5b2b299fc
+ .quad 0x3ff36431a2de883b
+ .quad 0x3ff3678e281b7475
+ .quad 0x3ff36aeb428335b4
+ .quad 0x3ff36e48f22fa77c
+ .quad 0x3ff371a7373aa9cb
+ .quad 0x3ff3750611be211c
+ .quad 0x3ff3786581d3f669
+ .quad 0x3ff37bc587961726
+ .quad 0x3ff37f26231e754a
+ .quad 0x3ff3828754870746
+ .quad 0x3ff385e91be9c811
+ .quad 0x3ff3894b7960b71f
+ .quad 0x3ff38cae6d05d866
+ .quad 0x3ff39011f6f3345f
+ .quad 0x3ff393761742d808
+ .quad 0x3ff396dace0ed4e1
+ .quad 0x3ff39a401b7140ef
+ .quad 0x3ff39da5ff8436bc
+ .quad 0x3ff3a10c7a61d55b
+ .quad 0x3ff3a4738c244064
+ .quad 0x3ff3a7db34e59ff7
+ .quad 0x3ff3ab4374c020bd
+ .quad 0x3ff3aeac4bcdf3ea
+ .quad 0x3ff3b215ba294f39
+ .quad 0x3ff3b57fbfec6cf4
+ .quad 0x3ff3b8ea5d318bef
+ .quad 0x3ff3bc559212ef89
+ .quad 0x3ff3bfc15eaadfb1
+ .quad 0x3ff3c32dc313a8e5
+ .quad 0x3ff3c69abf679c2e
+ .quad 0x3ff3ca0853c10f28
+ .quad 0x3ff3cd76803a5c00
+ .quad 0x3ff3d0e544ede173
+ .quad 0x3ff3d454a1f602d0
+ .quad 0x3ff3d7c4976d27fa
+ .quad 0x3ff3db35256dbd67
+ .quad 0x3ff3dea64c123422
+ .quad 0x3ff3e2180b7501cc
+ .quad 0x3ff3e58a63b0a09b
+ .quad 0x3ff3e8fd54df8f5c
+ .quad 0x3ff3ec70df1c5175
+ .quad 0x3ff3efe502816ee3
+ .quad 0x3ff3f359bf29743f
+ .quad 0x3ff3f6cf152ef2b8
+ .quad 0x3ff3fa4504ac801c
+ .quad 0x3ff3fdbb8dbcb6d2
+ .quad 0x3ff40132b07a35df
+ .quad 0x3ff404aa6cffa0e5
+ .quad 0x3ff40822c367a024
+ .quad 0x3ff40b9bb3cce07c
+ .quad 0x3ff40f153e4a136a
+ .quad 0x3ff4128f62f9ef0e
+ .quad 0x3ff4160a21f72e2a
+ .quad 0x3ff419857b5c901f
+ .quad 0x3ff41d016f44d8f5
+ .quad 0x3ff4207dfdcad153
+ .quad 0x3ff423fb2709468a
+ .quad 0x3ff42778eb1b0a8b
+ .quad 0x3ff42af74a1af3f1
+ .quad 0x3ff42e764423ddfd
+ .quad 0x3ff431f5d950a897
+ .quad 0x3ff4357609bc3850
+ .quad 0x3ff438f6d5817663
+ .quad 0x3ff43c783cbb50b4
+ .quad 0x3ff43ffa3f84b9d4
+ .quad 0x3ff4437cddf8a8fe
+ .quad 0x3ff4470018321a1a
+ .quad 0x3ff44a83ee4c0dbd
+ .quad 0x3ff44e086061892d
+ .quad 0x3ff4518d6e8d965b
+ .quad 0x3ff4551318eb43ec
+ .quad 0x3ff458995f95a532
+ .quad 0x3ff45c2042a7d232
+ .quad 0x3ff45fa7c23ce7a4
+ .quad 0x3ff4632fde7006f4
+ .quad 0x3ff466b8975c563e
+ .quad 0x3ff46a41ed1d0057
+ .quad 0x3ff46dcbdfcd34c8
+ .quad 0x3ff471566f8827d0
+ .quad 0x3ff474e19c691265
+ .quad 0x3ff4786d668b3237
+ .quad 0x3ff47bf9ce09c9ab
+ .quad 0x3ff47f86d3001fe5
+ .quad 0x3ff48314758980bf
+ .quad 0x3ff486a2b5c13cd0
+ .quad 0x3ff48a3193c2a96c
+ .quad 0x3ff48dc10fa920a1
+ .quad 0x3ff491512990013f
+ .quad 0x3ff494e1e192aed2
+ .quad 0x3ff4987337cc91a5
+ .quad 0x3ff49c052c5916c4
+ .quad 0x3ff49f97bf53affd
+ .quad 0x3ff4a32af0d7d3de
+ .quad 0x3ff4a6bec100fdba
+ .quad 0x3ff4aa532feaada6
+ .quad 0x3ff4ade83db0687a
+ .quad 0x3ff4b17dea6db7d7
+ .quad 0x3ff4b514363e2a20
+ .quad 0x3ff4b8ab213d5283
+ .quad 0x3ff4bc42ab86c8f1
+ .quad 0x3ff4bfdad5362a27
+ .quad 0x3ff4c3739e6717aa
+ .quad 0x3ff4c70d073537ca
+ .quad 0x3ff4caa70fbc35a1
+ .quad 0x3ff4ce41b817c114
+ .quad 0x3ff4d1dd00638ed8
+ .quad 0x3ff4d578e8bb586b
+ .quad 0x3ff4d915713adc1e
+ .quad 0x3ff4dcb299fddd0d
+ .quad 0x3ff4e05063202327
+ .quad 0x3ff4e3eeccbd7b2a
+ .quad 0x3ff4e78dd6f1b6a6
+ .quad 0x3ff4eb2d81d8abff
+ .quad 0x3ff4eecdcd8e3669
+ .quad 0x3ff4f26eba2e35f0
+ .quad 0x3ff4f61047d48f73
+ .quad 0x3ff4f9b2769d2ca7
+ .quad 0x3ff4fd5546a3fc17
+ .quad 0x3ff500f8b804f127
+ .quad 0x3ff5049ccadc0412
+ .quad 0x3ff508417f4531ee
+ .quad 0x3ff50be6d55c7ca9
+ .quad 0x3ff50f8ccd3deb0d
+ .quad 0x3ff51333670588bf
+ .quad 0x3ff516daa2cf6642
+ .quad 0x3ff51a8280b798f4
+ .quad 0x3ff51e2b00da3b14
+ .quad 0x3ff521d423536bbe
+ .quad 0x3ff5257de83f4eef
+ .quad 0x3ff529284fba0d84
+ .quad 0x3ff52cd359dfd53d
+ .quad 0x3ff5307f06ccd8ba
+ .quad 0x3ff5342b569d4f82
+ .quad 0x3ff537d8496d75fc
+ .quad 0x3ff53b85df598d78
+ .quad 0x3ff53f34187ddc28
+ .quad 0x3ff542e2f4f6ad27
+ .quad 0x3ff5469274e05078
+ .quad 0x3ff54a4298571b06
+ .quad 0x3ff54df35f7766a3
+ .quad 0x3ff551a4ca5d920f
+ .quad 0x3ff55556d92600f1
+ .quad 0x3ff559098bed1bdf
+ .quad 0x3ff55cbce2cf505b
+ .quad 0x3ff56070dde910d2
+ .quad 0x3ff564257d56d4a2
+ .quad 0x3ff567dac1351819
+ .quad 0x3ff56b90a9a05c72
+ .quad 0x3ff56f4736b527da
+ .quad 0x3ff572fe68900573
+ .quad 0x3ff576b63f4d854c
+ .quad 0x3ff57a6ebb0a3c6d
+ .quad 0x3ff57e27dbe2c4cf
+ .quad 0x3ff581e1a1f3bd60
+ .quad 0x3ff5859c0d59ca07
+ .quad 0x3ff589571e31939f
+ .quad 0x3ff58d12d497c7fd
+ .quad 0x3ff590cf30a919ed
+ .quad 0x3ff5948c32824135
+ .quad 0x3ff59849da3ffa96
+ .quad 0x3ff59c0827ff07cc
+ .quad 0x3ff59fc71bdc2f8e
+ .quad 0x3ff5a386b5f43d92
+ .quad 0x3ff5a746f664028b
+ .quad 0x3ff5ab07dd485429
+ .quad 0x3ff5aec96abe0d1f
+ .quad 0x3ff5b28b9ee20d1e
+ .quad 0x3ff5b64e79d138d8
+ .quad 0x3ff5ba11fba87a03
+ .quad 0x3ff5bdd62484bf56
+ .quad 0x3ff5c19af482fc8f
+ .quad 0x3ff5c5606bc02a6d
+ .quad 0x3ff5c9268a5946b7
+ .quad 0x3ff5cced506b543a
+ .quad 0x3ff5d0b4be135acc
+ .quad 0x3ff5d47cd36e6747
+ .quad 0x3ff5d84590998b93
+ .quad 0x3ff5dc0ef5b1de9e
+ .quad 0x3ff5dfd902d47c65
+ .quad 0x3ff5e3a3b81e85ec
+ .quad 0x3ff5e76f15ad2148
+ .quad 0x3ff5eb3b1b9d799a
+ .quad 0x3ff5ef07ca0cbf0f
+ .quad 0x3ff5f2d5211826e8
+ .quad 0x3ff5f6a320dceb71
+ .quad 0x3ff5fa71c9784c0b
+ .quad 0x3ff5fe411b078d26
+ .quad 0x3ff6021115a7f849
+ .quad 0x3ff605e1b976dc09
+ .quad 0x3ff609b306918c13
+ .quad 0x3ff60d84fd15612a
+ .quad 0x3ff611579d1fb925
+ .quad 0x3ff6152ae6cdf6f4
+ .quad 0x3ff618feda3d829f
+ .quad 0x3ff61cd3778bc944
+ .quad 0x3ff620a8bed63d1f
+ .quad 0x3ff6247eb03a5585
+ .quad 0x3ff628554bd58ee5
+ .quad 0x3ff62c2c91c56acd
+ .quad 0x3ff6300482276fe8
+ .quad 0x3ff633dd1d1929fd
+ .quad 0x3ff637b662b829f5
+ .quad 0x3ff63b90532205d8
+ .quad 0x3ff63f6aee7458cd
+ .quad 0x3ff6434634ccc320
+ .quad 0x3ff647222648ea3d
+ .quad 0x3ff64afec30678b7
+ .quad 0x3ff64edc0b231e41
+ .quad 0x3ff652b9febc8fb7
+ .quad 0x3ff656989df08719
+ .quad 0x3ff65a77e8dcc390
+ .quad 0x3ff65e57df9f096b
+ .quad 0x3ff6623882552225
+ .quad 0x3ff66619d11cdc5f
+ .quad 0x3ff669fbcc140be7
+ .quad 0x3ff66dde735889b8
+ .quad 0x3ff671c1c70833f6
+ .quad 0x3ff675a5c740edf5
+ .quad 0x3ff6798a7420a036
+ .quad 0x3ff67d6fcdc5386a
+ .quad 0x3ff68155d44ca973
+ .quad 0x3ff6853c87d4eb62
+ .quad 0x3ff68923e87bfb7a
+ .quad 0x3ff68d0bf65fdc34
+ .quad 0x3ff690f4b19e9538
+ .quad 0x3ff694de1a563367
+ .quad 0x3ff698c830a4c8d4
+ .quad 0x3ff69cb2f4a86cca
+ .quad 0x3ff6a09e667f3bcd
+ .quad 0x3ff6a48a86475795
+ .quad 0x3ff6a877541ee718
+ .quad 0x3ff6ac64d0241683
+ .quad 0x3ff6b052fa75173e
+ .quad 0x3ff6b441d3301fee
+ .quad 0x3ff6b8315a736c75
+ .quad 0x3ff6bc21905d3df0
+ .quad 0x3ff6c012750bdabf
+ .quad 0x3ff6c404089d8e7d
+ .quad 0x3ff6c7f64b30aa09
+ .quad 0x3ff6cbe93ce38381
+ .quad 0x3ff6cfdcddd47645
+ .quad 0x3ff6d3d12e21e2fb
+ .quad 0x3ff6d7c62dea2f8a
+ .quad 0x3ff6dbbbdd4bc720
+ .quad 0x3ff6dfb23c651a2f
+ .quad 0x3ff6e3a94b549e71
+ .quad 0x3ff6e7a10a38cee8
+ .quad 0x3ff6eb9979302bdd
+ .quad 0x3ff6ef9298593ae5
+ .quad 0x3ff6f38c67d286dd
+ .quad 0x3ff6f786e7ba9fef
+ .quad 0x3ff6fb8218301b90
+ .quad 0x3ff6ff7df9519484
+ .quad 0x3ff7037a8b3daadb
+ .quad 0x3ff70777ce1303f6
+ .quad 0x3ff70b75c1f04a84
+ .quad 0x3ff70f7466f42e87
+ .quad 0x3ff71373bd3d6551
+ .quad 0x3ff71773c4eaa988
+ .quad 0x3ff71b747e1abb24
+ .quad 0x3ff71f75e8ec5f74
+ .quad 0x3ff72378057e611a
+ .quad 0x3ff7277ad3ef9011
+ .quad 0x3ff72b7e545ec1a8
+ .quad 0x3ff72f8286ead08a
+ .quad 0x3ff733876bb29cb8
+ .quad 0x3ff7378d02d50b8f
+ .quad 0x3ff73b934c7107c7
+ .quad 0x3ff73f9a48a58174
+ .quad 0x3ff743a1f7916e05
+ .quad 0x3ff747aa5953c849
+ .quad 0x3ff74bb36e0b906d
+ .quad 0x3ff74fbd35d7cbfd
+ .quad 0x3ff753c7b0d785e8
+ .quad 0x3ff757d2df29ce7c
+ .quad 0x3ff75bdec0edbb6b
+ .quad 0x3ff75feb564267c9
+ .quad 0x3ff763f89f46f40f
+ .quad 0x3ff768069c1a861d
+ .quad 0x3ff76c154cdc4937
+ .quad 0x3ff77024b1ab6e09
+ .quad 0x3ff77434caa72aa7
+ .quad 0x3ff7784597eeba8f
+ .quad 0x3ff77c5719a15ea6
+ .quad 0x3ff780694fde5d3f
+ .quad 0x3ff7847c3ac50219
+ .quad 0x3ff7888fda749e5d
+ .quad 0x3ff78ca42f0c88a5
+ .quad 0x3ff790b938ac1cf6
+ .quad 0x3ff794cef772bcc9
+ .quad 0x3ff798e56b7fcf03
+ .quad 0x3ff79cfc94f2bfff
+ .quad 0x3ff7a11473eb0187
+ .quad 0x3ff7a52d08880ad9
+ .quad 0x3ff7a94652e958aa
+ .quad 0x3ff7ad60532e6d20
+ .quad 0x3ff7b17b0976cfdb
+ .quad 0x3ff7b59675e20def
+ .quad 0x3ff7b9b2988fb9ec
+ .quad 0x3ff7bdcf719f6bd7
+ .quad 0x3ff7c1ed0130c132
+ .quad 0x3ff7c60b47635cf9
+ .quad 0x3ff7ca2a4456e7a3
+ .quad 0x3ff7ce49f82b0f24
+ .quad 0x3ff7d26a62ff86f0
+ .quad 0x3ff7d68b84f407f8
+ .quad 0x3ff7daad5e2850ac
+ .quad 0x3ff7decfeebc24fe
+ .quad 0x3ff7e2f336cf4e62
+ .quad 0x3ff7e71736819bcd
+ .quad 0x3ff7eb3bedf2e1b9
+ .quad 0x3ff7ef615d42fa24
+ .quad 0x3ff7f3878491c491
+ .quad 0x3ff7f7ae63ff260a
+ .quad 0x3ff7fbd5fbab091f
+ .quad 0x3ff7fffe4bb55dec
+ .quad 0x3ff80427543e1a12
+ .quad 0x3ff80851156538be
+ .quad 0x3ff80c7b8f4abaa9
+ .quad 0x3ff810a6c20ea617
+ .quad 0x3ff814d2add106d9
+ .quad 0x3ff818ff52b1ee50
+ .quad 0x3ff81d2cb0d1736a
+ .quad 0x3ff8215ac84fb2a6
+ .quad 0x3ff82589994cce13
+ .quad 0x3ff829b923e8ed53
+ .quad 0x3ff82de968443d9a
+ .quad 0x3ff8321a667ef1b2
+ .quad 0x3ff8364c1eb941f7
+ .quad 0x3ff83a7e91136c5d
+ .quad 0x3ff83eb1bdadb46d
+ .quad 0x3ff842e5a4a8634a
+ .quad 0x3ff8471a4623c7ad
+ .quad 0x3ff84b4fa24035ea
+ .quad 0x3ff84f85b91e07f1
+ .quad 0x3ff853bc8add9d4c
+ .quad 0x3ff857f4179f5b21
+ .quad 0x3ff85c2c5f83ac35
+ .quad 0x3ff8606562ab00ec
+ .quad 0x3ff8649f2135cf48
+ .quad 0x3ff868d99b4492ed
+ .quad 0x3ff86d14d0f7cd1d
+ .quad 0x3ff87150c27004c2
+ .quad 0x3ff8758d6fcdc666
+ .quad 0x3ff879cad931a436
+ .quad 0x3ff87e08febc3608
+ .quad 0x3ff88247e08e1957
+ .quad 0x3ff886877ec7f144
+ .quad 0x3ff88ac7d98a6699
+ .quad 0x3ff88f08f0f627cb
+ .quad 0x3ff8934ac52be8f7
+ .quad 0x3ff8978d564c63e7
+ .quad 0x3ff89bd0a478580f
+ .quad 0x3ff8a014afd08a94
+ .quad 0x3ff8a4597875c644
+ .quad 0x3ff8a89efe88dba1
+ .quad 0x3ff8ace5422aa0db
+ .quad 0x3ff8b12c437bf1d4
+ .quad 0x3ff8b574029db01e
+ .quad 0x3ff8b9bc7fb0c302
+ .quad 0x3ff8be05bad61778
+ .quad 0x3ff8c24fb42ea033
+ .quad 0x3ff8c69a6bdb5598
+ .quad 0x3ff8cae5e1fd35c4
+ .quad 0x3ff8cf3216b5448c
+ .quad 0x3ff8d37f0a248b7f
+ .quad 0x3ff8d7ccbc6c19e6
+ .quad 0x3ff8dc1b2dad04c4
+ .quad 0x3ff8e06a5e0866d9
+ .quad 0x3ff8e4ba4d9f60a1
+ .quad 0x3ff8e90afc931857
+ .quad 0x3ff8ed5c6b04b9f6
+ .quad 0x3ff8f1ae99157736
+ .quad 0x3ff8f60186e68793
+ .quad 0x3ff8fa553499284b
+ .quad 0x3ff8fea9a24e9c5c
+ .quad 0x3ff902fed0282c8a
+ .quad 0x3ff90754be472760
+ .quad 0x3ff90bab6ccce12c
+ .quad 0x3ff91002dbdab403
+ .quad 0x3ff9145b0b91ffc6
+ .quad 0x3ff918b3fc142a19
+ .quad 0x3ff91d0dad829e70
+ .quad 0x3ff921681ffece05
+ .quad 0x3ff925c353aa2fe2
+ .quad 0x3ff92a1f48a640dc
+ .quad 0x3ff92e7bff148396
+ .quad 0x3ff932d977168083
+ .quad 0x3ff93737b0cdc5e5
+ .quad 0x3ff93b96ac5be7d1
+ .quad 0x3ff93ff669e2802b
+ .quad 0x3ff94456e9832ead
+ .quad 0x3ff948b82b5f98e5
+ .quad 0x3ff94d1a2f996a33
+ .quad 0x3ff9517cf65253d1
+ .quad 0x3ff955e07fac0ccd
+ .quad 0x3ff95a44cbc8520f
+ .quad 0x3ff95ea9dac8e658
+ .quad 0x3ff9630faccf9243
+ .quad 0x3ff9677641fe2446
+ .quad 0x3ff96bdd9a7670b3
+ .quad 0x3ff97045b65a51ba
+ .quad 0x3ff974ae95cba768
+ .quad 0x3ff9791838ec57ab
+ .quad 0x3ff97d829fde4e50
+ .quad 0x3ff981edcac37d05
+ .quad 0x3ff98659b9bddb5b
+ .quad 0x3ff98ac66cef66c8
+ .quad 0x3ff98f33e47a22a2
+ .quad 0x3ff993a220801829
+ .quad 0x3ff9981121235681
+ .quad 0x3ff99c80e685f2b5
+ .quad 0x3ff9a0f170ca07ba
+ .quad 0x3ff9a562c011b66d
+ .quad 0x3ff9a9d4d47f2598
+ .quad 0x3ff9ae47ae3481ed
+ .quad 0x3ff9b2bb4d53fe0d
+ .quad 0x3ff9b72fb1ffd285
+ .quad 0x3ff9bba4dc5a3dd3
+ .quad 0x3ff9c01acc858463
+ .quad 0x3ff9c49182a3f090
+ .quad 0x3ff9c908fed7d2aa
+ .quad 0x3ff9cd81414380f2
+ .quad 0x3ff9d1fa4a09579d
+ .quad 0x3ff9d674194bb8d5
+ .quad 0x3ff9daeeaf2d0cb8
+ .quad 0x3ff9df6a0bcfc15e
+ .quad 0x3ff9e3e62f564ad5
+ .quad 0x3ff9e86319e32323
+ .quad 0x3ff9ece0cb98ca4b
+ .quad 0x3ff9f15f4499c647
+ .quad 0x3ff9f5de8508a311
+ .quad 0x3ff9fa5e8d07f29e
+ .quad 0x3ff9fedf5cba4ce0
+ .quad 0x3ffa0360f4424fcb
+ .quad 0x3ffa07e353c29f50
+ .quad 0x3ffa0c667b5de565
+ .quad 0x3ffa10ea6b36d1fe
+ .quad 0x3ffa156f23701b15
+ .quad 0x3ffa19f4a42c7ca9
+ .quad 0x3ffa1e7aed8eb8bb
+ .quad 0x3ffa2301ffb99757
+ .quad 0x3ffa2789dacfe68c
+ .quad 0x3ffa2c127ef47a74
+ .quad 0x3ffa309bec4a2d33
+ .quad 0x3ffa352622f3def6
+ .quad 0x3ffa39b1231475f7
+ .quad 0x3ffa3e3ceccede7c
+ .quad 0x3ffa42c980460ad8
+ .quad 0x3ffa4756dd9cf36e
+ .quad 0x3ffa4be504f696b1
+ .quad 0x3ffa5073f675f924
+ .quad 0x3ffa5503b23e255d
+ .quad 0x3ffa599438722c03
+ .quad 0x3ffa5e25893523d4
+ .quad 0x3ffa62b7a4aa29a1
+ .quad 0x3ffa674a8af46052
+ .quad 0x3ffa6bde3c36f0e6
+ .quad 0x3ffa7072b8950a73
+ .quad 0x3ffa75080031e22b
+ .quad 0x3ffa799e1330b358
+ .quad 0x3ffa7e34f1b4bf62
+ .quad 0x3ffa82cc9be14dca
+ .quad 0x3ffa876511d9ac32
+ .quad 0x3ffa8bfe53c12e59
+ .quad 0x3ffa909861bb2e1d
+ .quad 0x3ffa95333beb0b7e
+ .quad 0x3ffa99cee2742c9d
+ .quad 0x3ffa9e6b5579fdbf
+ .quad 0x3ffaa308951ff14d
+ .quad 0x3ffaa7a6a1897fd2
+ .quad 0x3ffaac457ada2803
+ .quad 0x3ffab0e521356eba
+ .quad 0x3ffab58594bedefa
+ .quad 0x3ffaba26d59a09ee
+ .quad 0x3ffabec8e3ea86ee
+ .quad 0x3ffac36bbfd3f37a
+ .quad 0x3ffac80f6979f340
+ .quad 0x3ffaccb3e100301e
+ .quad 0x3ffad159268a5a1c
+ .quad 0x3ffad5ff3a3c2774
+ .quad 0x3ffadaa61c395493
+ .quad 0x3ffadf4dcca5a413
+ .quad 0x3ffae3f64ba4dec6
+ .quad 0x3ffae89f995ad3ad
+ .quad 0x3ffaed49b5eb5803
+ .quad 0x3ffaf1f4a17a4735
+ .quad 0x3ffaf6a05c2b82e9
+ .quad 0x3ffafb4ce622f2ff
+ .quad 0x3ffafffa3f84858c
+ .quad 0x3ffb04a868742ee4
+ .quad 0x3ffb09576115e994
+ .quad 0x3ffb0e07298db666
+ .quad 0x3ffb12b7c1ff9c61
+ .quad 0x3ffb17692a8fa8cd
+ .quad 0x3ffb1c1b6361ef31
+ .quad 0x3ffb20ce6c9a8952
+ .quad 0x3ffb2582465d973c
+ .quad 0x3ffb2a36f0cf3f3a
+ .quad 0x3ffb2eec6c13addd
+ .quad 0x3ffb33a2b84f15fb
+ .quad 0x3ffb3859d5a5b0b1
+ .quad 0x3ffb3d11c43bbd62
+ .quad 0x3ffb41ca843581ba
+ .quad 0x3ffb468415b749b1
+ .quad 0x3ffb4b3e78e56786
+ .quad 0x3ffb4ff9ade433c6
+ .quad 0x3ffb54b5b4d80d4a
+ .quad 0x3ffb59728de5593a
+ .quad 0x3ffb5e303930830c
+ .quad 0x3ffb62eeb6ddfc87
+ .quad 0x3ffb67ae07123dc3
+ .quad 0x3ffb6c6e29f1c52a
+ .quad 0x3ffb712f1fa1177b
+ .quad 0x3ffb75f0e844bfc6
+ .quad 0x3ffb7ab384014f76
+ .quad 0x3ffb7f76f2fb5e47
+ .quad 0x3ffb843b35578a51
+ .quad 0x3ffb89004b3a7804
+ .quad 0x3ffb8dc634c8d228
+ .quad 0x3ffb928cf22749e4
+ .quad 0x3ffb9754837a96b7
+ .quad 0x3ffb9c1ce8e77680
+ .quad 0x3ffba0e62292ad7d
+ .quad 0x3ffba5b030a1064a
+ .quad 0x3ffbaa7b133751e3
+ .quad 0x3ffbaf46ca7a67a7
+ .quad 0x3ffbb413568f255a
+ .quad 0x3ffbb8e0b79a6f1f
+ .quad 0x3ffbbdaeedc12f82
+ .quad 0x3ffbc27df9285775
+ .quad 0x3ffbc74dd9f4de4f
+ .quad 0x3ffbcc1e904bc1d2
+ .quad 0x3ffbd0f01c520628
+ .quad 0x3ffbd5c27e2cb5e5
+ .quad 0x3ffbda95b600e20b
+ .quad 0x3ffbdf69c3f3a207
+ .quad 0x3ffbe43ea82a13b5
+ .quad 0x3ffbe91462c95b60
+ .quad 0x3ffbedeaf3f6a3c2
+ .quad 0x3ffbf2c25bd71e09
+ .quad 0x3ffbf79a9a9001d2
+ .quad 0x3ffbfc73b0468d30
+ .quad 0x3ffc014d9d2004aa
+ .quad 0x3ffc06286141b33d
+ .quad 0x3ffc0b03fcd0ea5c
+ .quad 0x3ffc0fe06ff301f4
+ .quad 0x3ffc14bdbacd586a
+ .quad 0x3ffc199bdd85529c
+ .quad 0x3ffc1e7ad8405be6
+ .quad 0x3ffc235aab23e61e
+ .quad 0x3ffc283b56556999
+ .quad 0x3ffc2d1cd9fa652c
+ .quad 0x3ffc31ff36385e29
+ .quad 0x3ffc36e26b34e065
+ .quad 0x3ffc3bc679157e38
+ .quad 0x3ffc40ab5fffd07a
+ .quad 0x3ffc45912019768c
+ .quad 0x3ffc4a77b9881650
+ .quad 0x3ffc4f5f2c715c31
+ .quad 0x3ffc544778fafb22
+ .quad 0x3ffc59309f4aac9f
+ .quad 0x3ffc5e1a9f8630ad
+ .quad 0x3ffc630579d34ddd
+ .quad 0x3ffc67f12e57d14b
+ .quad 0x3ffc6cddbd398ea4
+ .quad 0x3ffc71cb269e601f
+ .quad 0x3ffc76b96aac2686
+ .quad 0x3ffc7ba88988c933
+ .quad 0x3ffc8098835a3611
+ .quad 0x3ffc8589584661a1
+ .quad 0x3ffc8a7b087346f4
+ .quad 0x3ffc8f6d9406e7b5
+ .quad 0x3ffc9460fb274c22
+ .quad 0x3ffc99553dfa8313
+ .quad 0x3ffc9e4a5ca6a1f8
+ .quad 0x3ffca3405751c4db
+ .quad 0x3ffca8372e220e61
+ .quad 0x3ffcad2ee13da7cb
+ .quad 0x3ffcb22770cac0f9
+ .quad 0x3ffcb720dcef9069
+ .quad 0x3ffcbc1b25d25337
+ .quad 0x3ffcc1164b994d23
+ .quad 0x3ffcc6124e6ac88b
+ .quad 0x3ffccb0f2e6d1675
+ .quad 0x3ffcd00cebc68e87
+ .quad 0x3ffcd50b869d8f0f
+ .quad 0x3ffcda0aff187d02
+ .quad 0x3ffcdf0b555dc3fa
+ .quad 0x3ffce40c8993d63d
+ .quad 0x3ffce90e9be12cb9
+ .quad 0x3ffcee118c6c4709
+ .quad 0x3ffcf3155b5bab74
+ .quad 0x3ffcf81a08d5e6ec
+ .quad 0x3ffcfd1f95018d17
+ .quad 0x3ffd022600053845
+ .quad 0x3ffd072d4a07897c
+ .quad 0x3ffd0c35732f2870
+ .quad 0x3ffd113e7ba2c38c
+ .quad 0x3ffd164863890fee
+ .quad 0x3ffd1b532b08c968
+ .quad 0x3ffd205ed248b287
+ .quad 0x3ffd256b596f948c
+ .quad 0x3ffd2a78c0a43f72
+ .quad 0x3ffd2f87080d89f2
+ .quad 0x3ffd34962fd2517a
+ .quad 0x3ffd39a638197a3c
+ .quad 0x3ffd3eb72109ef21
+ .quad 0x3ffd43c8eacaa1d6
+ .quad 0x3ffd48db95828ac7
+ .quad 0x3ffd4def2158a91f
+ .quad 0x3ffd53038e7402ce
+ .quad 0x3ffd5818dcfba487
+ .quad 0x3ffd5d2f0d16a1c3
+ .quad 0x3ffd62461eec14be
+ .quad 0x3ffd675e12a31e7f
+ .quad 0x3ffd6c76e862e6d3
+ .quad 0x3ffd7190a0529c51
+ .quad 0x3ffd76ab3a99745b
+ .quad 0x3ffd7bc6b75eab1f
+ .quad 0x3ffd80e316c98398
+ .quad 0x3ffd86005901478f
+ .quad 0x3ffd8b1e7e2d479d
+ .quad 0x3ffd903d8674db2b
+ .quad 0x3ffd955d71ff6075
+ .quad 0x3ffd9a7e40f43c89
+ .quad 0x3ffd9f9ff37adb4a
+ .quad 0x3ffda4c289baaf6e
+ .quad 0x3ffda9e603db3285
+ .quad 0x3ffdaf0a6203e4f5
+ .quad 0x3ffdb42fa45c4dfd
+ .quad 0x3ffdb955cb0bfbb6
+ .quad 0x3ffdbe7cd63a8315
+ .quad 0x3ffdc3a4c60f7fea
+ .quad 0x3ffdc8cd9ab294e4
+ .quad 0x3ffdcdf7544b6b92
+ .quad 0x3ffdd321f301b460
+ .quad 0x3ffdd84d76fd269e
+ .quad 0x3ffddd79e065807d
+ .quad 0x3ffde2a72f628712
+ .quad 0x3ffde7d5641c0658
+ .quad 0x3ffded047eb9d12d
+ .quad 0x3ffdf2347f63c159
+ .quad 0x3ffdf7656641b78c
+ .quad 0x3ffdfc97337b9b5f
+ .quad 0x3ffe01c9e7395b56
+ .quad 0x3ffe06fd81a2ece1
+ .quad 0x3ffe0c3202e04c5d
+ .quad 0x3ffe11676b197d17
+ .quad 0x3ffe169dba768949
+ .quad 0x3ffe1bd4f11f8220
+ .quad 0x3ffe210d0f3c7fba
+ .quad 0x3ffe264614f5a129
+ .quad 0x3ffe2b8002730c71
+ .quad 0x3ffe30bad7dcee90
+ .quad 0x3ffe35f6955b7b78
+ .quad 0x3ffe3b333b16ee12
+ .quad 0x3ffe4070c9378842
+ .quad 0x3ffe45af3fe592e8
+ .quad 0x3ffe4aee9f495ddc
+ .quad 0x3ffe502ee78b3ff6
+ .quad 0x3ffe557018d3970b
+ .quad 0x3ffe5ab2334ac7ee
+ .quad 0x3ffe5ff537193e75
+ .quad 0x3ffe653924676d76
+ .quad 0x3ffe6a7dfb5dceca
+ .quad 0x3ffe6fc3bc24e350
+ .quad 0x3ffe750a66e532eb
+ .quad 0x3ffe7a51fbc74c83
+ .quad 0x3ffe7f9a7af3c60b
+ .quad 0x3ffe84e3e4933c7e
+ .quad 0x3ffe8a2e38ce53df
+ .quad 0x3ffe8f7977cdb740
+ .quad 0x3ffe94c5a1ba18bd
+ .quad 0x3ffe9a12b6bc3181
+ .quad 0x3ffe9f60b6fcc1c7
+ .quad 0x3ffea4afa2a490da
+ .quad 0x3ffea9ff79dc6d14
+ .quad 0x3ffeaf503ccd2be5
+ .quad 0x3ffeb4a1eb9fa9d1
+ .quad 0x3ffeb9f4867cca6e
+ .quad 0x3ffebf480d8d786d
+ .quad 0x3ffec49c80faa594
+ .quad 0x3ffec9f1e0ed4ac2
+ .quad 0x3ffecf482d8e67f1
+ .quad 0x3ffed49f67070435
+ .quad 0x3ffed9f78d802dc2
+ .quad 0x3ffedf50a122f9e6
+ .quad 0x3ffee4aaa2188510
+ .quad 0x3ffeea059089f2d0
+ .quad 0x3ffeef616ca06dd6
+ .quad 0x3ffef4be368527f6
+ .quad 0x3ffefa1bee615a27
+ .quad 0x3ffeff7a945e4487
+ .quad 0x3fff04da28a52e59
+ .quad 0x3fff0a3aab5f6609
+ .quad 0x3fff0f9c1cb6412a
+ .quad 0x3fff14fe7cd31c7b
+ .quad 0x3fff1a61cbdf5be7
+ .quad 0x3fff1fc60a046a84
+ .quad 0x3fff252b376bba97
+ .quad 0x3fff2a91543ec595
+ .quad 0x3fff2ff860a70c22
+ .quad 0x3fff35605cce1613
+ .quad 0x3fff3ac948dd7274
+ .quad 0x3fff403324feb781
+ .quad 0x3fff459df15b82ac
+ .quad 0x3fff4b09ae1d78a1
+ .quad 0x3fff50765b6e4540
+ .quad 0x3fff55e3f9779ba5
+ .quad 0x3fff5b5288633625
+ .quad 0x3fff60c2085ad652
+ .quad 0x3fff6632798844f8
+ .quad 0x3fff6ba3dc155226
+ .quad 0x3fff7116302bd526
+ .quad 0x3fff768975f5ac86
+ .quad 0x3fff7bfdad9cbe14
+ .quad 0x3fff8172d74af6e1
+ .quad 0x3fff86e8f32a4b45
+ .quad 0x3fff8c600164b6dc
+ .quad 0x3fff91d802243c89
+ .quad 0x3fff9750f592e677
+ .quad 0x3fff9ccadbdac61d
+ .quad 0x3fffa245b525f439
+ .quad 0x3fffa7c1819e90d8
+ .quad 0x3fffad3e416ec354
+ .quad 0x3fffb2bbf4c0ba54
+ .quad 0x3fffb83a9bbeabd1
+ .quad 0x3fffbdba3692d514
+ .quad 0x3fffc33ac5677ab8
+ .quad 0x3fffc8bc4866e8ad
+ .quad 0x3fffce3ebfbb7237
+ .quad 0x3fffd3c22b8f71f1
+ .quad 0x3fffd9468c0d49cc
+ .quad 0x3fffdecbe15f6314
+ .quad 0x3fffe4522bb02e6e
+ .quad 0x3fffe9d96b2a23d9
+ .quad 0x3fffef619ff7c2b3
+ .quad 0x3ffff4eaca4391b6
+ .quad 0x3ffffa74ea381efc
+
+/* Range reduction coefficients:
+ * log(2) inverted = 2^k/ln2 */
+double_vector __dbInvLn2 0x40971547652b82fe
+
+/* right-shifter value = 3*2^52 */
+double_vector __dbShifter 0x4338000000000000
+
+/* log(2) high part = ln2/2^k(52-k-9 hibits) */
+double_vector __dbLn2hi 0x3f462e42fec00000
+
+/* log(2) low part = ln2/2^k(52-k-9..104-k-9 lobits) */
+double_vector __dbLn2lo 0x3d5d1cf79abc9e3b
+
+/* Polynomial coefficients (k=10, deg=3): */
+double_vector __dPC0 0x3ff0000000000000
+double_vector __dPC1 0x3fe0000001ebfbe0
+double_vector __dPC2 0x3fc5555555555556
+
+/* Other constants:
+ * index mask = 2^k-1 */
+double_vector __lIndexMask 0x00000000000003ff
+
+/* absolute value mask (SP) */
+float_vector __iAbsMask 0x7fffffff
+
+/* domain range (SP) (>=4086232B) */
+float_vector __iDomainRange 0x4086232a
+ .type __svml_dexp_data,@object
+ .size __svml_dexp_data,.-__svml_dexp_data
diff --git a/sysdeps/x86_64/fpu/svml_d_exp_data.h b/sysdeps/x86_64/fpu/svml_d_exp_data.h
new file mode 100644
index 0000000..71ebdb7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_d_exp_data.h
@@ -0,0 +1,52 @@
+/* Offsets for data table for function exp.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef D_EXP_DATA_H
+#define D_EXP_DATA_H
+
+#define __dbT 0
+#define __dbInvLn2 8192
+#define __dbShifter 8256
+#define __dbLn2hi 8320
+#define __dbLn2lo 8384
+#define __dPC0 8448
+#define __dPC1 8512
+#define __dPC2 8576
+#define __lIndexMask 8640
+#define __iAbsMask 8704
+#define __iDomainRange 8768
+
+.macro double_vector offset value
+.if .-__svml_dexp_data != \offset
+.err
+.endif
+.rept 8
+.quad \value
+.endr
+.endm
+
+.macro float_vector offset value
+.if .-__svml_dexp_data != \offset
+.err
+.endif
+.rept 16
+.long \value
+.endr
+.endm
+
+#endif
diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
index dfbc3d3..946a8f6 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
@@ -25,3 +25,4 @@
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVbN2v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVbN2v_sin)
VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVbN2v_log)
+VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVbN2v_exp)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen2.c b/sysdeps/x86_64/fpu/test-double-vlen2.c
index a119bfc..1b72748 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen2.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen2.c
@@ -21,5 +21,6 @@
#define TEST_VECTOR_cos 1
#define TEST_VECTOR_sin 1
#define TEST_VECTOR_log 1
+#define TEST_VECTOR_exp 1
#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
index 6e01a89..40c3e25 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
@@ -28,3 +28,4 @@
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVdN4v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVdN4v_sin)
VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVdN4v_log)
+VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVdN4v_exp)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c
index ef6e1c2..45d6ed6 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c
@@ -24,6 +24,7 @@
#define TEST_VECTOR_cos 1
#define TEST_VECTOR_sin 1
#define TEST_VECTOR_log 1
+#define TEST_VECTOR_exp 1
#define REQUIRE_AVX2
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
index e9f8905..094c9bf 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
@@ -25,3 +25,4 @@
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVcN4v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVcN4v_sin)
VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVcN4v_log)
+VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVcN4v_exp)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4.c b/sysdeps/x86_64/fpu/test-double-vlen4.c
index 71ea85c..b89e77f 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4.c
@@ -21,5 +21,6 @@
#define TEST_VECTOR_cos 1
#define TEST_VECTOR_sin 1
#define TEST_VECTOR_log 1
+#define TEST_VECTOR_exp 1
#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
index 290d59c..0b4398a 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
@@ -25,3 +25,4 @@
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVeN8v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVeN8v_sin)
VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVeN8v_log)
+VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVeN8v_exp)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen8.c b/sysdeps/x86_64/fpu/test-double-vlen8.c
index e2f2cfe..277b312 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen8.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen8.c
@@ -21,6 +21,7 @@
#define TEST_VECTOR_cos 1
#define TEST_VECTOR_sin 1
#define TEST_VECTOR_log 1
+#define TEST_VECTOR_exp 1
#define REQUIRE_AVX512F