aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog24
-rw-r--r--manual/tunables.texi10
-rw-r--r--sysdeps/powerpc/cpu-features.c39
-rw-r--r--sysdeps/powerpc/cpu-features.h28
-rw-r--r--sysdeps/powerpc/dl-procinfo.c16
-rw-r--r--sysdeps/powerpc/dl-tunables.list28
-rw-r--r--sysdeps/powerpc/ldsodefs.h1
-rw-r--r--sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h2
-rw-r--r--sysdeps/powerpc/powerpc64/dl-machine.h4
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/Makefile4
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c2
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/memcpy-power8-cached.S176
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/memcpy.c23
13 files changed, 344 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 50da031..d5f7256 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,27 @@
+2017-12-11 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
+ Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
+
+ * manual/tunables.texi (Hardware Capability Tunables): Document
+ glibc.tune.cached_memopt.
+ * sysdeps/powerpc/cpu-features.c: New file.
+ * sysdeps/powerpc/cpu-features.h: New file.
+ * sysdeps/powerpc/dl-procinfo.c [!IS_IN(ldconfig)]: Add
+ _dl_powerpc_cpu_features.
+ * sysdeps/powerpc/dl-tunables.list: New file.
+ * sysdeps/powerpc/ldsodefs.h: Include cpu-features.h.
+ * sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h
+ (INIT_ARCH): Initialize use_aligned_memopt.
+ * sysdeps/powerpc/powerpc64/dl-machine.h [defined(SHARED &&
+ IS_IN(rtld))]: Restrict dl_platform_init availability and
+ initialize CPU features used by tunables.
+ * sysdeps/powerpc/powerpc64/multiarch/Makefile (sysdep_routines):
+ Add memcpy-power8-cached.
+ * sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c: Add
+ __memcpy_power8_cached.
+ * sysdeps/powerpc/powerpc64/multiarch/memcpy.c: Likewise.
+ * sysdeps/powerpc/powerpc64/multiarch/memcpy-power8-cached.S:
+ New file.
+
2017-12-11 H.J. Lu <hongjiu.lu@intel.com>
* string/Makefile (CFLAGS-inl-tester.c): Replace = with +=.
diff --git a/manual/tunables.texi b/manual/tunables.texi
index e851b95..6e0ee28 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -319,6 +319,16 @@ the ones in @code{sysdeps/x86/cpu-features.h}.
This tunable is specific to i386 and x86-64.
@end deftp
+@deftp Tunable glibc.tune.cached_memopt
+The @code{glibc.tune.cached_memopt=[0|1]} tunable allows the user to
+enable optimizations recommended for cacheable memory. If set to
+@code{1}, @theglibc{} assumes that the process memory image consists
+of cacheable (non-device) memory only. The default, @code{0},
+indicates that the process may use device memory.
+
+This tunable is specific to powerpc, powerpc64 and powerpc64le.
+@end deftp
+
@deftp Tunable glibc.tune.cpu
The @code{glibc.tune.cpu=xxx} tunable allows the user to tell @theglibc{} to
assume that the CPU is @code{xxx} where xxx may have one of these values:
diff --git a/sysdeps/powerpc/cpu-features.c b/sysdeps/powerpc/cpu-features.c
new file mode 100644
index 0000000..6870582
--- /dev/null
+++ b/sysdeps/powerpc/cpu-features.c
@@ -0,0 +1,39 @@
+/* Initialize cpu feature data. PowerPC version.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdint.h>
+#include <cpu-features.h>
+
+#if HAVE_TUNABLES
+# include <elf/dl-tunables.h>
+#endif
+
+static inline void
+init_cpu_features (struct cpu_features *cpu_features)
+{
+ /* Default is to use aligned memory access on optimized function unless
+ tunables is enable, since for this case user can explicit disable
+ unaligned optimizations. */
+#if HAVE_TUNABLES
+ int32_t cached_memfunc = TUNABLE_GET (glibc, tune, cached_memopt, int32_t,
+ NULL);
+ cpu_features->use_cached_memopt = (cached_memfunc > 0);
+#else
+ cpu_features->use_cached_memopt = false;
+#endif
+}
diff --git a/sysdeps/powerpc/cpu-features.h b/sysdeps/powerpc/cpu-features.h
new file mode 100644
index 0000000..36a8bb4
--- /dev/null
+++ b/sysdeps/powerpc/cpu-features.h
@@ -0,0 +1,28 @@
+/* Initialize cpu feature data. PowerPC version.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef __CPU_FEATURES_POWERPC_H
+# define __CPU_FEATURES_POWERPC_H
+
+#include <stdbool.h>
+
+struct cpu_features
+{
+ bool use_cached_memopt;
+};
+
+#endif /* __CPU_FEATURES_H */
diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
index 55a6e78..c8b14454d 100644
--- a/sysdeps/powerpc/dl-procinfo.c
+++ b/sysdeps/powerpc/dl-procinfo.c
@@ -42,6 +42,22 @@
# define PROCINFO_CLASS
#endif
+#if !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL && defined SHARED
+ ._dl_powerpc_cpu_features
+# else
+PROCINFO_CLASS struct cpu_features _dl_powerpc_cpu_features
+# endif
+# ifndef PROCINFO_DECL
+= { }
+# endif
+# if !defined SHARED || defined PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
+
#if !defined PROCINFO_DECL && defined SHARED
._dl_powerpc_cap_flags
#else
diff --git a/sysdeps/powerpc/dl-tunables.list b/sysdeps/powerpc/dl-tunables.list
new file mode 100644
index 0000000..9e14b9a
--- /dev/null
+++ b/sysdeps/powerpc/dl-tunables.list
@@ -0,0 +1,28 @@
+# powerpc specific tunables.
+# Copyright (C) 2017 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+glibc {
+ tune {
+ cached_memopt {
+ type: INT_32
+ minval: 0
+ maxval: 1
+ default: 0
+ }
+ }
+}
diff --git a/sysdeps/powerpc/ldsodefs.h b/sysdeps/powerpc/ldsodefs.h
index 466de79..6f8b3a2 100644
--- a/sysdeps/powerpc/ldsodefs.h
+++ b/sysdeps/powerpc/ldsodefs.h
@@ -20,6 +20,7 @@
#define _POWERPC_LDSODEFS_H 1
#include <elf.h>
+#include <cpu-features.h>
struct La_ppc32_regs;
struct La_ppc32_retval;
diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h b/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h
index f2e6a4b..6038941 100644
--- a/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h
+++ b/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h
@@ -37,6 +37,8 @@
#define INIT_ARCH() \
unsigned long int hwcap = __GLRO(dl_hwcap); \
unsigned long int __attribute__((unused)) hwcap2 = __GLRO(dl_hwcap2); \
+ bool __attribute__((unused)) use_cached_memopt = \
+ GLRO(dl_powerpc_cpu_features).use_cached_memopt; \
if (hwcap & PPC_FEATURE_ARCH_2_06) \
hwcap |= PPC_FEATURE_ARCH_2_05 | \
PPC_FEATURE_POWER5_PLUS | \
diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h
index aeb91b8..76dceee 100644
--- a/sysdeps/powerpc/powerpc64/dl-machine.h
+++ b/sysdeps/powerpc/powerpc64/dl-machine.h
@@ -27,6 +27,7 @@
#include <dl-tls.h>
#include <sysdep.h>
#include <hwcapinfo.h>
+#include <cpu-features.c>
/* Translate a processor specific dynamic tag to the index
in l_info array. */
@@ -300,13 +301,14 @@ BODY_PREFIX "_dl_start_user:\n" \
/* We define an initialization function to initialize HWCAP/HWCAP2 and
platform data so it can be copied into the TCB later. This is called
very early in _dl_sysdep_start for dynamically linked binaries. */
-#ifdef SHARED
+#if defined(SHARED) && IS_IN (rtld)
# define DL_PLATFORM_INIT dl_platform_init ()
static inline void __attribute__ ((unused))
dl_platform_init (void)
{
__tcb_parse_hwcap_and_convert_at_platform ();
+ init_cpu_features (&GLRO(dl_powerpc_cpu_features));
}
#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index dea49ac..4df6b45 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -1,6 +1,6 @@
ifeq ($(subdir),string)
-sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
- memcpy-power4 memcpy-ppc64 \
+sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
+ memcpy-cell memcpy-power4 memcpy-ppc64 \
memcmp-power8 memcmp-power7 memcmp-power4 memcmp-ppc64 \
memset-power7 memset-power6 memset-power4 \
memset-ppc64 memset-power8 \
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 6a88536..77a60ea 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -51,6 +51,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#ifdef SHARED
/* Support sysdeps/powerpc/powerpc64/multiarch/memcpy.c. */
IFUNC_IMPL (i, name, memcpy,
+ IFUNC_IMPL_ADD (array, i, memcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07,
+ __memcpy_power8_cached)
IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_HAS_VSX,
__memcpy_power7)
IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_06,
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcpy-power8-cached.S b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power8-cached.S
new file mode 100644
index 0000000..e8bea91
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power8-cached.S
@@ -0,0 +1,176 @@
+/* Optimized memcpy implementation for cached memory on PowerPC64/POWER8.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+ Returns 'dst'. */
+
+ .machine power8
+ENTRY_TOCLESS (__memcpy_power8_cached, 5)
+ CALL_MCOUNT 3
+
+ cmpldi cr7,r5,15
+ bgt cr7,L(ge_16)
+ andi. r9,r5,0x1
+ mr r9,r3
+ beq cr0,1f
+ lbz r10,0(r4)
+ addi r9,r3,1
+ addi r4,r4,1
+ stb r10,0(r3)
+1:
+ andi. r10,r5,0x2
+ beq cr0,2f
+ lhz r10,0(r4)
+ addi r9,r9,2
+ addi r4,r4,2
+ sth r10,-2(r9)
+2:
+ andi. r10,r5,0x4
+ beq cr0,3f
+ lwz r10,0(r4)
+ addi r9,9,4
+ addi r4,4,4
+ stw r10,-4(r9)
+3:
+ andi. r10,r5,0x8
+ beqlr cr0
+ ld r10,0(r4)
+ std r10,0(r9)
+ blr
+
+ .align 4
+L(ge_16):
+ cmpldi cr7,r5,32
+ ble cr7,L(ge_16_le_32)
+ cmpldi cr7,r5,64
+ ble cr7,L(gt_32_le_64)
+
+ /* Align dst to 16 bytes. */
+ andi. r9,r3,0xf
+ mr r12,r3
+ beq cr0,L(dst_is_align_16)
+ lxvd2x v0,0,r4
+ subfic r12,r9,16
+ subf r5,r12,r5
+ add r4,r4,r12
+ add r12,r3,r12
+ stxvd2x v0,0,r3
+L(dst_is_align_16):
+ cmpldi cr7,r5,127
+ ble cr7,L(tail_copy)
+ mr r9,r12
+ srdi r10,r5,7
+ li r11,16
+ li r6,32
+ li r7,48
+ mtctr r10
+ clrrdi r0,r5,7
+
+ /* Main loop, copy 128 bytes each time. */
+ .align 4
+L(copy_128):
+ lxvd2x v10,0,r4
+ lxvd2x v11,r4,r11
+ addi r8,r4,64
+ addi r10,r9,64
+ lxvd2x v12,r4,r6
+ lxvd2x v0,r4,r7
+ addi r4,r4,128
+ stxvd2x v10,0,r9
+ stxvd2x v11,r9,r11
+ stxvd2x v12,r9,r6
+ stxvd2x v0,r9,r7
+ addi r9,r9,128
+ lxvd2x v10,0,r8
+ lxvd2x v11,r8,r11
+ lxvd2x v12,r8,r6
+ lxvd2x v0,r8,r7
+ stxvd2x v10,0,r10
+ stxvd2x v11,r10,r11
+ stxvd2x v12,r10,r6
+ stxvd2x v0,r10,r7
+ bdnz L(copy_128)
+
+ add r12,r12,r0
+ rldicl r5,r5,0,57
+L(tail_copy):
+ cmpldi cr7,r5,63
+ ble cr7,L(tail_le_64)
+ li r8,16
+ li r10,32
+ lxvd2x v10,0,r4
+ li r9,48
+ addi r5,r5,-64
+ lxvd2x v11,r4,r8
+ lxvd2x v12,r4,r10
+ lxvd2x v0,r4,r9
+ addi r4,r4,64
+ stxvd2x v10,0,r12
+ stxvd2x v11,r12,r8
+ stxvd2x v12,r12,r10
+ stxvd2x v0,r12,9
+ addi r12,r12,64
+
+L(tail_le_64):
+ cmpldi cr7,r5,32
+ bgt cr7,L(tail_gt_32_le_64)
+ cmpdi cr7,r5,0
+ beqlr cr7
+ addi r5,r5,-32
+ li r9,16
+ add r8,r4,r5
+ add r10,r12,r5
+ lxvd2x v12,r4,r5
+ lxvd2x v0,r8,r9
+ stxvd2x v12,r12,r5
+ stxvd2x v0,r10,r9
+ blr
+
+ .align 4
+L(ge_16_le_32):
+ addi r5,r5,-16
+ lxvd2x v0,0,r4
+ lxvd2x v1,r4,r5
+ stxvd2x v0,0,r3
+ stxvd2x v1,r3,r5
+ blr
+
+ .align 4
+L(gt_32_le_64):
+ mr r12,r3
+
+ .align 4
+L(tail_gt_32_le_64):
+ li r9,16
+ lxvd2x v0,0,r4
+ addi r5,r5,-32
+ lxvd2x v1,r4,r9
+ add r8,r4,r5
+ lxvd2x v2,r4,r5
+ add r10,r12,r5
+ lxvd2x v3,r8,r9
+ stxvd2x v0,0,r12
+ stxvd2x v1,r12,r9
+ stxvd2x v2,r12,r5
+ stxvd2x v3,r10,r9
+ blr
+
+END_GEN_TB (__memcpy_power8_cached,TB_TOCLESS)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcpy.c b/sysdeps/powerpc/powerpc64/multiarch/memcpy.c
index 9f4286c..fb49fe1 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memcpy.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy.c
@@ -35,18 +35,21 @@ extern __typeof (__redirect_memcpy) __memcpy_cell attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_power6 attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_a2 attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_power7 attribute_hidden;
+extern __typeof (__redirect_memcpy) __memcpy_power8_cached attribute_hidden;
libc_ifunc (__libc_memcpy,
- (hwcap & PPC_FEATURE_HAS_VSX)
- ? __memcpy_power7 :
- (hwcap & PPC_FEATURE_ARCH_2_06)
- ? __memcpy_a2 :
- (hwcap & PPC_FEATURE_ARCH_2_05)
- ? __memcpy_power6 :
- (hwcap & PPC_FEATURE_CELL_BE)
- ? __memcpy_cell :
- (hwcap & PPC_FEATURE_POWER4)
- ? __memcpy_power4
+ ((hwcap2 & PPC_FEATURE2_ARCH_2_07) && use_cached_memopt)
+ ? __memcpy_power8_cached :
+ (hwcap & PPC_FEATURE_HAS_VSX)
+ ? __memcpy_power7 :
+ (hwcap & PPC_FEATURE_ARCH_2_06)
+ ? __memcpy_a2 :
+ (hwcap & PPC_FEATURE_ARCH_2_05)
+ ? __memcpy_power6 :
+ (hwcap & PPC_FEATURE_CELL_BE)
+ ? __memcpy_cell :
+ (hwcap & PPC_FEATURE_POWER4)
+ ? __memcpy_power4
: __memcpy_ppc);
#undef memcpy