aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-10-22 08:11:15 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-10-22 08:12:41 -0700
commit5313581cb52fd5d3d2cf222ddb6f8f86f090974f (patch)
treee2ec5d944c1089cec1de29c5c72c3fb600a8c3fb
parent6089a3ee24cede17e9443aef0aa72fa1a0ba1548 (diff)
downloadglibc-5313581cb52fd5d3d2cf222ddb6f8f86f090974f.zip
glibc-5313581cb52fd5d3d2cf222ddb6f8f86f090974f.tar.gz
glibc-5313581cb52fd5d3d2cf222ddb6f8f86f090974f.tar.bz2
i386: Replace assembly versions of e_powf with generic e_powf.c
This patch replaces i386 assembly versions of e_powf with generic e_powf.c. For workload-spec2017.wrf, on Nehalem, it improves performance by: Before After Improvement reciprocal-throughput 230.855 78.3358 194% latency 231.685 94.1259 146% On Skylake, it improves performance by: Before After Improvement reciprocal-throughput 239.858 47.4713 405% latency 247.57 93.8798 163% On IvyBridge with --disable-multi-arch, it improves performance by: Before After Improvement reciprocal-throughput 269.078 63.3758 324% latency 271.473 102.091 165% * sysdeps/i386/fpu/e_powf.S: Removed. * sysdeps/i386/fpu/e_powf_log2_data.c: Likewise. * sysdeps/i386/fpu/w_powf.c: Likewise. * sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_powf.c. * sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise. * sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines): Add e_powf-sse2. (CFLAGS-e_powf-sse2.c): New. * sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c: New file. * sysdeps/i386/i686/fpu/multiarch/e_powf.c: Likewise.
-rw-r--r--ChangeLog13
-rw-r--r--sysdeps/i386/fpu/e_powf.S392
-rw-r--r--sysdeps/i386/fpu/e_powf_log2_data.c1
-rw-r--r--sysdeps/i386/fpu/libm-test-ulps6
-rw-r--r--sysdeps/i386/fpu/w_powf.c1
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/Makefile3
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c3
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/e_powf.c43
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/libm-test-ulps18
9 files changed, 79 insertions, 401 deletions
diff --git a/ChangeLog b/ChangeLog
index 78910c5..5d45da1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,18 @@
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
+ * sysdeps/i386/fpu/e_powf.S: Removed.
+ * sysdeps/i386/fpu/e_powf_log2_data.c: Likewise.
+ * sysdeps/i386/fpu/w_powf.c: Likewise.
+ * sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_powf.c.
+ * sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
+ * sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
+ Add e_powf-sse2.
+ (CFLAGS-e_powf-sse2.c): New.
+ * sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c: New file.
+ * sysdeps/i386/i686/fpu/multiarch/e_powf.c: Likewise.
+
+2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
+
* sysdeps/i386/fpu/e_log2f.S: Removed.
* sysdeps/i386/fpu/e_log2f_data.c: Likewise.
* sysdeps/i386/fpu/w_log2f.c: Likewise.
diff --git a/sysdeps/i386/fpu/e_powf.S b/sysdeps/i386/fpu/e_powf.S
deleted file mode 100644
index 467ef23..0000000
--- a/sysdeps/i386/fpu/e_powf.S
+++ /dev/null
@@ -1,392 +0,0 @@
-/* ix87 specific implementation of pow function.
- Copyright (C) 1996-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <machine/asm.h>
-#include <i386-math-asm.h>
-
- .section .rodata.cst8,"aM",@progbits,8
-
- .p2align 3
- .type one,@object
-one: .double 1.0
- ASM_SIZE_DIRECTIVE(one)
- .type limit,@object
-limit: .double 0.29
- ASM_SIZE_DIRECTIVE(limit)
- .type p31,@object
-p31: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x41
- ASM_SIZE_DIRECTIVE(p31)
-
- .section .rodata.cst16,"aM",@progbits,16
-
- .p2align 3
- .type infinity,@object
-inf_zero:
-infinity:
- .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f
- ASM_SIZE_DIRECTIVE(infinity)
- .type zero,@object
-zero: .double 0.0
- ASM_SIZE_DIRECTIVE(zero)
- .type minf_mzero,@object
-minf_mzero:
-minfinity:
- .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff
-mzero:
- .byte 0, 0, 0, 0, 0, 0, 0, 0x80
- ASM_SIZE_DIRECTIVE(minf_mzero)
-DEFINE_FLT_MIN
-
-#ifdef PIC
-# define MO(op) op##@GOTOFF(%ecx)
-# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
-#else
-# define MO(op) op
-# define MOX(op,x,f) op(,x,f)
-#endif
-
- .text
-ENTRY(__ieee754_powf)
- flds 8(%esp) // y
- fxam
-
-#ifdef PIC
- LOAD_PIC_REG (cx)
-#endif
-
- fnstsw
- movb %ah, %dl
- andb $0x45, %ah
- cmpb $0x40, %ah // is y == 0 ?
- je 11f
-
- cmpb $0x05, %ah // is y == ±inf ?
- je 12f
-
- cmpb $0x01, %ah // is y == NaN ?
- je 30f
-
- flds 4(%esp) // x : y
-
- subl $4, %esp
- cfi_adjust_cfa_offset (4)
-
- fxam
- fnstsw
- movb %ah, %dh
- andb $0x45, %ah
- cmpb $0x40, %ah
- je 20f // x is ±0
-
- cmpb $0x05, %ah
- je 15f // x is ±inf
-
- cmpb $0x01, %ah
- je 33f // x is NaN
-
- fxch // y : x
-
- /* fistpl raises invalid exception for |y| >= 1L<<31. */
- fld %st // y : y : x
- fabs // |y| : y : x
- fcompl MO(p31) // y : x
- fnstsw
- sahf
- jnc 2f
-
- /* First see whether `y' is a natural number. In this case we
- can use a more precise algorithm. */
- fld %st // y : y : x
- fistpl (%esp) // y : x
- fildl (%esp) // int(y) : y : x
- fucomp %st(1) // y : x
- fnstsw
- sahf
- jne 3f
-
- /* OK, we have an integer value for y. */
- popl %edx
- cfi_adjust_cfa_offset (-4)
- orl $0, %edx
- fstp %st(0) // x
- jns 4f // y >= 0, jump
- fdivrl MO(one) // 1/x (now referred to as x)
- negl %edx
-4: fldl MO(one) // 1 : x
- fxch
-
- /* If y is even, take the absolute value of x. Otherwise,
- ensure all intermediate values that might overflow have the
- sign of x. */
- testb $1, %dl
- jnz 6f
- fabs
-
-6: shrl $1, %edx
- jnc 5f
- fxch
- fabs
- fmul %st(1) // x : ST*x
- fxch
-5: fld %st // x : x : ST*x
- fabs // |x| : x : ST*x
- fmulp // |x|*x : ST*x
- testl %edx, %edx
- jnz 6b
- fstp %st(0) // ST*x
- FLT_NARROW_EVAL_UFLOW_NONNAN
- ret
-
- /* y is ±NAN */
-30: flds 4(%esp) // x : y
- fldl MO(one) // 1.0 : x : y
- fucomp %st(1) // x : y
- fnstsw
- sahf
- je 31f
- fxch // y : x
-31: fstp %st(1)
- ret
-
- cfi_adjust_cfa_offset (4)
- .align ALIGNARG(4)
-2: /* y is a large integer (so even). */
- fxch // x : y
- fabs // |x| : y
- fxch // y : x
- .align ALIGNARG(4)
-3: /* y is a real number. */
- fxch // x : y
- fldl MO(one) // 1.0 : x : y
- fldl MO(limit) // 0.29 : 1.0 : x : y
- fld %st(2) // x : 0.29 : 1.0 : x : y
- fsub %st(2) // x-1 : 0.29 : 1.0 : x : y
- fabs // |x-1| : 0.29 : 1.0 : x : y
- fucompp // 1.0 : x : y
- fnstsw
- fxch // x : 1.0 : y
- sahf
- ja 7f
- fsub %st(1) // x-1 : 1.0 : y
- fyl2xp1 // log2(x) : y
- jmp 8f
-
-7: fyl2x // log2(x) : y
-8: fmul %st(1) // y*log2(x) : y
- fst %st(1) // y*log2(x) : y*log2(x)
- frndint // int(y*log2(x)) : y*log2(x)
- fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
- fxch // fract(y*log2(x)) : int(y*log2(x))
- f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x))
- faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x))
- fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
-32: addl $4, %esp
- cfi_adjust_cfa_offset (-4)
- fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
- FLT_NARROW_EVAL_UFLOW_NONNAN
- ret
-
- /* x is NaN. */
- cfi_adjust_cfa_offset (4)
-33: addl $4, %esp
- cfi_adjust_cfa_offset (-4)
- fstp %st(1)
- ret
-
- // pow(x,±0) = 1
- .align ALIGNARG(4)
-11: fstp %st(0) // pop y
- fldl MO(one)
- ret
-
- // y == ±inf
- .align ALIGNARG(4)
-12: fstp %st(0) // pop y
- fldl MO(one) // 1
- flds 4(%esp) // x : 1
- fabs // abs(x) : 1
- fucompp // < 1, == 1, or > 1
- fnstsw
- andb $0x45, %ah
- cmpb $0x45, %ah
- je 13f // jump if x is NaN
-
- cmpb $0x40, %ah
- je 14f // jump if |x| == 1
-
- shlb $1, %ah
- xorb %ah, %dl
- andl $2, %edx
- fldl MOX(inf_zero, %edx, 4)
- ret
-
- .align ALIGNARG(4)
-14: fldl MO(one)
- ret
-
- .align ALIGNARG(4)
-13: flds 4(%esp) // load x == NaN
- ret
-
- cfi_adjust_cfa_offset (4)
- .align ALIGNARG(4)
- // x is ±inf
-15: fstp %st(0) // y
- testb $2, %dh
- jz 16f // jump if x == +inf
-
- // fistpl raises invalid exception for |y| >= 1L<<31, so test
- // that (in which case y is certainly even) before testing
- // whether y is odd.
- fld %st // y : y
- fabs // |y| : y
- fcompl MO(p31) // y
- fnstsw
- sahf
- jnc 16f
-
- // We must find out whether y is an odd integer.
- fld %st // y : y
- fistpl (%esp) // y
- fildl (%esp) // int(y) : y
- fucompp // <empty>
- fnstsw
- sahf
- jne 17f
-
- // OK, the value is an integer.
- popl %edx
- cfi_adjust_cfa_offset (-4)
- testb $1, %dl
- jz 18f // jump if not odd
- // It's an odd integer.
- shrl $31, %edx
- fldl MOX(minf_mzero, %edx, 8)
- ret
-
- cfi_adjust_cfa_offset (4)
- .align ALIGNARG(4)
-16: fcompl MO(zero)
- addl $4, %esp
- cfi_adjust_cfa_offset (-4)
- fnstsw
- shrl $5, %eax
- andl $8, %eax
- fldl MOX(inf_zero, %eax, 1)
- ret
-
- cfi_adjust_cfa_offset (4)
- .align ALIGNARG(4)
-17: shll $30, %edx // sign bit for y in right position
- addl $4, %esp
- cfi_adjust_cfa_offset (-4)
-18: shrl $31, %edx
- fldl MOX(inf_zero, %edx, 8)
- ret
-
- cfi_adjust_cfa_offset (4)
- .align ALIGNARG(4)
- // x is ±0
-20: fstp %st(0) // y
- testb $2, %dl
- jz 21f // y > 0
-
- // x is ±0 and y is < 0. We must find out whether y is an odd integer.
- testb $2, %dh
- jz 25f
-
- // fistpl raises invalid exception for |y| >= 1L<<31, so test
- // that (in which case y is certainly even) before testing
- // whether y is odd.
- fld %st // y : y
- fabs // |y| : y
- fcompl MO(p31) // y
- fnstsw
- sahf
- jnc 25f
-
- fld %st // y : y
- fistpl (%esp) // y
- fildl (%esp) // int(y) : y
- fucompp // <empty>
- fnstsw
- sahf
- jne 26f
-
- // OK, the value is an integer.
- popl %edx
- cfi_adjust_cfa_offset (-4)
- testb $1, %dl
- jz 27f // jump if not odd
- // It's an odd integer.
- // Raise divide-by-zero exception and get minus infinity value.
- fldl MO(one)
- fdivl MO(zero)
- fchs
- ret
-
- cfi_adjust_cfa_offset (4)
-25: fstp %st(0)
-26: addl $4, %esp
- cfi_adjust_cfa_offset (-4)
-27: // Raise divide-by-zero exception and get infinity value.
- fldl MO(one)
- fdivl MO(zero)
- ret
-
- cfi_adjust_cfa_offset (4)
- .align ALIGNARG(4)
- // x is ±0 and y is > 0. We must find out whether y is an odd integer.
-21: testb $2, %dh
- jz 22f
-
- // fistpl raises invalid exception for |y| >= 1L<<31, so test
- // that (in which case y is certainly even) before testing
- // whether y is odd.
- fcoml MO(p31) // y
- fnstsw
- sahf
- jnc 22f
-
- fld %st // y : y
- fistpl (%esp) // y
- fildl (%esp) // int(y) : y
- fucompp // <empty>
- fnstsw
- sahf
- jne 23f
-
- // OK, the value is an integer.
- popl %edx
- cfi_adjust_cfa_offset (-4)
- testb $1, %dl
- jz 24f // jump if not odd
- // It's an odd integer.
- fldl MO(mzero)
- ret
-
- cfi_adjust_cfa_offset (4)
-22: fstp %st(0)
-23: addl $4, %esp // Don't use pop.
- cfi_adjust_cfa_offset (-4)
-24: fldl MO(zero)
- ret
-
-END(__ieee754_powf)
-strong_alias (__ieee754_powf, __powf_finite)
diff --git a/sysdeps/i386/fpu/e_powf_log2_data.c b/sysdeps/i386/fpu/e_powf_log2_data.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/i386/fpu/e_powf_log2_data.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed. */
diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
index 64cac56..3ab3fd8 100644
--- a/sysdeps/i386/fpu/libm-test-ulps
+++ b/sysdeps/i386/fpu/libm-test-ulps
@@ -2370,24 +2370,30 @@ ldouble: 1
Function: "pow_downward":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
Function: "pow_towardzero":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
Function: "pow_upward":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
diff --git a/sysdeps/i386/fpu/w_powf.c b/sysdeps/i386/fpu/w_powf.c
deleted file mode 100644
index d133216f..0000000
--- a/sysdeps/i386/fpu/w_powf.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/../math/w_powf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/Makefile b/sysdeps/i386/i686/fpu/multiarch/Makefile
index eee3b8b..c0fa976 100644
--- a/sysdeps/i386/i686/fpu/multiarch/Makefile
+++ b/sysdeps/i386/i686/fpu/multiarch/Makefile
@@ -1,9 +1,10 @@
ifeq ($(subdir),math)
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 e_log2f-sse2 \
- s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
+ e_powf-sse2 s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_log2f-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse
+CFLAGS-e_powf-sse2.c = -msse2 -mfpmath=sse
endif
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c b/sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c
new file mode 100644
index 0000000..c56f6ee
--- /dev/null
+++ b/sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c
@@ -0,0 +1,3 @@
+#define __powf __powf_sse2
+
+#include <sysdeps/ieee754/flt-32/e_powf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_powf.c b/sysdeps/i386/i686/fpu/multiarch/e_powf.c
new file mode 100644
index 0000000..4dc4c87
--- /dev/null
+++ b/sysdeps/i386/i686/fpu/multiarch/e_powf.c
@@ -0,0 +1,43 @@
+/* Multiple versions of powf.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define powf __redirect_powf
+#define __DECL_SIMD___redirect_powf
+#include <math.h>
+#undef powf
+
+#define SYMBOL_NAME powf
+#include "ifunc-sse2.h"
+
+libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (__powf_ia32, __GI___powf, __redirect_powf)
+ __attribute__ ((visibility ("hidden")));
+
+# include <shlib-compat.h>
+versioned_symbol (libm, __powf, powf, GLIBC_2_27);
+#else
+weak_alias (__powf, powf)
+#endif
+
+strong_alias (__powf, __ieee754_powf)
+strong_alias (__powf, __powf_finite)
+
+#define __powf __powf_ia32
+#include <sysdeps/ieee754/flt-32/e_powf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
index b5d74df..26d90ec 100644
--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
+++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
@@ -2370,24 +2370,30 @@ ldouble: 1
Function: "pow_downward":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
Function: "pow_towardzero":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
Function: "pow_upward":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
@@ -2577,30 +2583,30 @@ ldouble: 5
Function: "tgamma_downward":
double: 3
-float: 4
+float: 5
float128: 5
idouble: 3
-ifloat: 4
+ifloat: 5
ifloat128: 5
ildouble: 5
ldouble: 5
Function: "tgamma_towardzero":
double: 4
-float: 4
+float: 5
float128: 5
idouble: 4
-ifloat: 4
+ifloat: 5
ifloat128: 5
ildouble: 5
ldouble: 5
Function: "tgamma_upward":
double: 4
-float: 4
+float: 6
float128: 4
idouble: 4
-ifloat: 4
+ifloat: 6
ifloat128: 4
ildouble: 5
ldouble: 5