powerpc: refactor logb{f,l}

The power7 logb implementation does not show a performance gain on ISA 2.07+ chips with faster floating-point to GRP instructions (currently POWER8 and POWER9). This patch moves the POWER7 implementation to generic one and enables it for POWER7. It also add some cleanup to use inline floating-point number instead of define them using static const. The performance difference is for POWER9: - Without patch: "logb": { "subnormal": { "duration": 4.99202e+09, "iterations": 8.83662e+08, "max": 75.194, "min": 5.501, "mean": 5.64925 }, "normal": { "duration": 4.97063e+09, "iterations": 9.97094e+08, "max": 46.489, "min": 4.956, "mean": 4.98512 } } - With patch: "logb": { "subnormal": { "duration": 4.97226e+09, "iterations": 9.92036e+08, "max": 77.209, "min": 4.892, "mean": 5.01218 }, "normal": { "duration": 4.96192e+09, "iterations": 1.07545e+09, "max": 12.361, "min": 4.593, "mean": 4.61382 } } The ifunc implementation is also enabled only for powerpc64. Checked on powerpc-linux-gnu (built without --with-cpu, with --with-cpu=power4 and with --with-cpu=power5+ and --disable-multi-arch), powerpc64-linux-gnu (built without --with-cp and with --with-cpu=power5+ and --disable-multi-arch). * sysdeps/powerpc/power7/fpu/s_logb.c: Move to ... * sysdeps/powerpc/fpu/s_logb.c: ... here. Use inline FP constants. * sysdeps/powerpc/power7/fpu/s_logbf.c: Move to ... * sysdeps/powerpc/fpu/s_logbf.c: ... here. Use inline FP constants. * sysdeps/powerpc/power7/fpu/s_logbl.c: Move to ... * sysdeps/powerpc/fpu/s_logbl.c: ... here. Use inline FP constants. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-power7.c: Adjust implementation path. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-power7.c: Adjust implementation path. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-power7.c: Adjust implementation path. * sysdeps/powerpc/powerpc64/be/fpu/multiarch/Makefile (libm-sysdep_routines): Add s_log* objects. (CFLAGS-s_logbf-power7.c, CFLAGS-s_logbl-power7.c, CFLAGS-s_logb-power7.c): New fule. * sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-power7.c: Move to ... * sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_logb-power7.c: ... here. * sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-ppc64.c: Move to ... * sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_logb-ppc64.c: ... here. * sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb.c: Move to ... * sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_logb.c: ... here. * sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-power7.c: Move to ... * sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_logbf-power7.c: ... here. * sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-ppc64.c: Move to ... * sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_logbf-ppc64.c: ... here. * sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf.c: Move to ... * sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_logbf.c: ... here. * sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-power7.c: Move to ... * sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_logbl-power7.c: ... here. * sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-ppc64.c: Move to ... * sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_logbl-ppc64.c: ... here. * sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl.c: Move to ... * sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_logbl.c: ... here. * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile: Remove file. * sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c: Remove file. * sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c: Likewise. * sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c: Likewise. Reviewed-by: Gabriel F. T. Gomes <gabrielftg@linux.ibm.com>
author: Adhemerval Zanella <adhemerval.zanella@linaro.org> 2019-03-19 12:22:21 +0000
committer: Adhemerval Zanella <adhemerval.zanella@linaro.org> 2019-07-08 17:22:22 -0300
commit: 6ea21bfe439159f9e480c580927902c6461ec68c (patch)
tree: 5c48b477141f978c0c1712d6c27d69928a3d1145 /sysdeps/powerpc/fpu
parent: 105f2ed368ed61785a0a63ddb47fc62b89b58e0d (diff)
download: glibc-6ea21bfe439159f9e480c580927902c6461ec68c.zip
glibc-6ea21bfe439159f9e480c580927902c6461ec68c.tar.gz
glibc-6ea21bfe439159f9e480c580927902c6461ec68c.tar.bz2
3 files changed, 198 insertions, 0 deletions
diff --git a/sysdeps/powerpc/fpu/s_logb.c b/sysdeps/powerpc/fpu/s_logb.c
new file mode 100644
index 0000000..364f7f0
--- /dev/null
+++ b/sysdeps/powerpc/fpu/s_logb.c
@@ -0,0 +1,64 @@
+/* Get exponent of a floating-point value.  PowerPC version.
+   Copyright (C) 2012-2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ISA 2.07 provides fast GPR to FP instruction (mfvsr{d,wz}) which make
+   generic implementation faster.  */
+#if defined(_ARCH_PWR8) || !defined(_ARCH_PWR7)
+# include <sysdeps/ieee754/dbl-64/s_logb.c>
+#else
+# include <math.h>
+# include <math_private.h>
+# include <math_ldbl_opt.h>
+# include <libm-alias-double.h>
+
+/* This implementation avoids FP to INT conversions by using VSX
+   bitwise instructions over FP values.  */
+double
+__logb (double x)
+{
+  double ret;
+
+  if (__glibc_unlikely (x == 0.0))
+    /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF].  */
+    return -1.0 / fabs (x);
+
+  /* Mask to extract the exponent.  */
+  asm ("xxland %x0,%x1,%x2\n"
+       "fcfid  %0,%0"
+       : "=d" (ret)
+       : "d" (x), "d" (0x7ff0000000000000ULL));
+  ret = (ret * 0x1p-52) - 1023.0;
+  if (ret > 1023.0)
+    /* Multiplication is used to set logb (+-INF) = INF.  */
+    return (x * x);
+  else if (ret == -1023.0)
+    {
+      /* POSIX specifies that denormal numbers are treated as
+         though they were normalized.  */
+      int64_t ix;
+      EXTRACT_WORDS64 (ix, x);
+      ix &= UINT64_C (0x7fffffffffffffff);
+      return (double) (-1023 - (__builtin_clzll (ix) - 12));
+    }
+  /* Test to avoid logb_downward (0.0) == -0.0.  */
+  return ret == -0.0 ? 0.0 : ret;
+}
+# ifndef __logb
+libm_alias_double (__logb, logb)
+# endif
+#endif
diff --git a/sysdeps/powerpc/fpu/s_logbf.c b/sysdeps/powerpc/fpu/s_logbf.c
new file mode 100644
index 0000000..cf564ad
--- /dev/null
+++ b/sysdeps/powerpc/fpu/s_logbf.c
@@ -0,0 +1,56 @@
+/* Get exponent of a floating-point value.  PowerPC version.
+   Copyright (C) 2012-2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ISA 2.07 provides fast GPR to FP instruction (mfvsr{d,wz}) which make
+   generic implementation faster.  */
+#if defined(_ARCH_PWR8) || !defined(_ARCH_PWR7)
+# include <sysdeps/ieee754/flt-32/s_logbf.c>
+#else
+# include <math.h>
+# include <libm-alias-float.h>
+/* This implementation avoids FP to INT conversions by using VSX
+   bitwise instructions over FP values.  */
+float
+__logbf (float x)
+{
+  /* VSX operation are all done internally as double.  */
+  double ret;
+
+  if (__glibc_unlikely (x == 0.0))
+    /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF].  */
+    return -1.0 / fabs (x);
+
+  /* mask to extract the exponent.  */
+  asm ("xxland %x0,%x1,%x2\n"
+       "fcfid  %0,%0"
+       : "=d"(ret)
+       : "d" (x), "d" (0x7ff0000000000000ULL));
+  /* ret = (ret >> 52) - 1023.0, since ret is double.  */
+  ret = (ret * 0x1p-52) - 1023.0;
+  if (ret > 127.0)
+    /* Multiplication is used to set logb (+-INF) = INF.  */
+    return (x * x);
+  /* Since operations are done with double we don't need
+     additional tests for subnormal numbers.
+     The test is to avoid logb_downward (0.0) == -0.0.  */
+  return ret == -0.0 ? 0.0 : ret;
+}
+# ifndef __logbf
+libm_alias_float (__logb, logb)
+# endif
+#endif
diff --git a/sysdeps/powerpc/fpu/s_logbl.c b/sysdeps/powerpc/fpu/s_logbl.c
new file mode 100644
index 0000000..543cd4a
--- /dev/null
+++ b/sysdeps/powerpc/fpu/s_logbl.c
@@ -0,0 +1,78 @@
+/* Get exponent of a floating-point value.  PowerPC version.
+   Copyright (C) 2012-2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ISA 2.07 provides fast GPR to FP instruction (mfvsr{d,wz}) which make
+   generic implementation faster.  */
+#if defined(_ARCH_PWR8) || !defined(_ARCH_PWR7)
+# include <./sysdeps/ieee754/ldbl-128ibm/s_logbl.c>
+#else
+# include <math.h>
+# include <math_private.h>
+# include <math_ldbl_opt.h>
+
+/* This implementation avoids FP to INT conversions by using VSX
+   bitwise instructions over FP values.  */
+long double
+__logbl (long double x)
+{
+  double xh, xl;
+  double ret;
+  int64_t hx;
+
+  if (__glibc_unlikely (x == 0.0))
+    /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF].  */
+    return -1.0L / __builtin_fabsl (x);
+
+  ldbl_unpack (x, &xh, &xl);
+  EXTRACT_WORDS64 (hx, xh);
+
+  /* Mask to extract the exponent.  */
+  asm ("xxland %x0,%x1,%x2\n"
+       "fcfid  %0,%0"
+       : "=d" (ret)
+       : "d" (xh), "d" (0x7ff0000000000000ULL));
+  ret = (ret * 0x1p-52) - 1023.0;
+  if (ret > 1023.0)
+    /* Multiplication is used to set logb (+-INF) = INF.  */
+    return (xh * xh);
+  else if (ret == -1023.0)
+    {
+      /* POSIX specifies that denormal number is treated as
+         though it were normalized.  */
+      return (long double) (- (__builtin_clzll (hx & 0x7fffffffffffffffLL) \
+			       - 12) - 1023);
+    }
+  else if ((hx & 0x000fffffffffffffLL) == 0)
+    {
+      /* If the high part is a power of 2, and the low part is nonzero
+	 with the opposite sign, the low part affects the
+	 exponent.  */
+      int64_t lx, rhx;
+      EXTRACT_WORDS64 (lx, xl);
+      rhx = (hx & 0x7ff0000000000000LL) >> 52;
+      if ((hx ^ lx) < 0 && (lx & 0x7fffffffffffffffLL) != 0)
+	rhx--;
+      return (long double) (rhx - 1023);
+    }
+  /* Test to avoid logb_downward (0.0) == -0.0.  */
+  return ret == -0.0 ? 0.0 : ret;
+}
+# ifndef __logbl
+long_double_symbol (libm, __logbl, logbl);
+# endif
+#endif
author	Adhemerval Zanella <adhemerval.zanella@linaro.org>	2019-03-19 12:22:21 +0000
committer	Adhemerval Zanella <adhemerval.zanella@linaro.org>	2019-07-08 17:22:22 -0300
commit	6ea21bfe439159f9e480c580927902c6461ec68c (patch)
tree	5c48b477141f978c0c1712d6c27d69928a3d1145 /sysdeps/powerpc/fpu
parent	105f2ed368ed61785a0a63ddb47fc62b89b58e0d (diff)
download	glibc-6ea21bfe439159f9e480c580927902c6461ec68c.zip glibc-6ea21bfe439159f9e480c580927902c6461ec68c.tar.gz glibc-6ea21bfe439159f9e480c580927902c6461ec68c.tar.bz2