aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc')
-rw-r--r--sysdeps/powerpc/powerpc64/le/power10/memcmp.S179
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/Makefile2
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c6
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/memcmp-power10.S26
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/memcmp.c6
5 files changed, 218 insertions, 1 deletions
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S
new file mode 100644
index 0000000..52f244e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S
@@ -0,0 +1,179 @@
+/* Optimized memcmp implementation for POWER10.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* TODO: Replace macros by the actual instructions when minimum binutils becomes
+ >= 2.35. This is used to keep compatibility with older versions. */
+#define VEXTRACTBM(rt,vrb) \
+ .long(((4)<<(32-6)) \
+ | ((rt)<<(32-11)) \
+ | ((8)<<(32-16)) \
+ | ((vrb)<<(32-21)) \
+ | 1602)
+
+#define LXVP(xtp,dq,ra) \
+ .long(((6)<<(32-6)) \
+ | ((((xtp)-32)>>1)<<(32-10)) \
+ | ((1)<<(32-11)) \
+ | ((ra)<<(32-16)) \
+ | dq)
+
+/* Compare 32 bytes. */
+#define COMPARE_32(vr1,vr2,offset,tail_1,tail_2)\
+ LXVP(32+vr1,offset,r3); \
+ LXVP(32+vr2,offset,r4); \
+ vcmpneb. v5,vr1+1,vr2+1; \
+ bne cr6,L(tail_2); \
+ vcmpneb. v4,vr1,vr2; \
+ bne cr6,L(tail_1); \
+
+#define TAIL(v_res,s1,s2) \
+ vctzlsbb r7,v_res; \
+ vextubrx r8,r7,s1; \
+ vextubrx r9,r7,s2; \
+ subf r3,r9,r8; \
+ blr; \
+
+/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4],
+ size_t size [r5]) */
+
+#ifndef MEMCMP
+# define MEMCMP memcmp
+#endif
+ .machine power9
+ENTRY_TOCLESS (MEMCMP, 4)
+ CALL_MCOUNT 3
+
+ cmpldi cr6,r5,64
+ bgt cr6,L(loop_head)
+
+/* Compare 64 bytes. This section is used for lengths <= 64 and for the last
+ bytes for larger lengths. */
+L(last_compare):
+ li r8,16
+
+ sldi r9,r5,56
+ sldi r8,r8,56
+ addi r6,r3,16
+ addi r7,r4,16
+
+ /* Align up to 16 bytes. */
+ lxvl 32+v0,r3,r9
+ lxvl 32+v2,r4,r9
+
+ /* The sub. and vcmpneb. results are concatenated by the crnand in order
+ to do a single branch. It's doing a NOT(CR0.GT AND CR6.EQ) then
+ loading to CR0.LT. That means r9 is not bigger than 0 and v4 is not
+ all equal to 0. */
+ sub. r9,r9,r8
+ vcmpneb. v4,v0,v2
+ crnand 4*cr0+lt,4*cr0+gt,4*cr6+eq
+ bt 4*cr0+lt,L(tail1)
+
+ addi r3,r3,32
+ addi r4,r4,32
+
+ lxvl 32+v1,r6,r9
+ lxvl 32+v3,r7,r9
+ sub. r9,r9,r8
+ vcmpneb. v5,v1,v3
+ crnand 4*cr0+lt,4*cr0+gt,4*cr6+eq
+ bt 4*cr0+lt,L(tail2)
+
+ addi r6,r3,16
+ addi r7,r4,16
+
+ lxvl 32+v6,r3,r9
+ lxvl 32+v8,r4,r9
+ sub. r9,r9,r8
+ vcmpneb. v4,v6,v8
+ crnand 4*cr0+lt,4*cr0+gt,4*cr6+eq
+ bt 4*cr0+lt,L(tail3)
+
+ lxvl 32+v7,r6,r9
+ lxvl 32+v9,r7,r9
+ vcmpneb. v5,v7,v9
+ bne cr6,L(tail4)
+
+L(finish):
+ /* The contents are equal. */
+ li r3,0
+ blr
+
+L(loop_head):
+ /* Calculate how many loops to run. */
+ srdi. r8,r5,7
+ beq L(loop_tail)
+ mtctr r8
+
+/* Main loop. Compares 128 bytes each loop. */
+ .p2align 5
+L(loop_128):
+ COMPARE_32(v0,v2,0,tail1,tail2)
+ COMPARE_32(v6,v8,32,tail3,tail4)
+ COMPARE_32(v10,v12,64,tail5,tail6)
+ COMPARE_32(v14,v16,96,tail7,tail8)
+
+ addi r3,r3,128
+ addi r4,r4,128
+ bdnz L(loop_128)
+
+ /* Account loop comparisons. */
+ clrldi. r5,r5,57
+ beq L(finish)
+
+/* Compares 64 bytes if length is still bigger than 64 bytes. */
+ .p2align 5
+L(loop_tail):
+ cmpldi r5,64
+ ble L(last_compare)
+ COMPARE_32(v0,v2,0,tail1,tail2)
+ COMPARE_32(v6,v8,32,tail3,tail4)
+ addi r3,r3,64
+ addi r4,r4,64
+ subi r5,r5,64
+ b L(last_compare)
+
+L(tail1):
+ TAIL(v4,v0,v2)
+
+L(tail2):
+ TAIL(v5,v1,v3)
+
+L(tail3):
+ TAIL(v4,v6,v8)
+
+L(tail4):
+ TAIL(v5,v7,v9)
+
+L(tail5):
+ TAIL(v4,v10,v12)
+
+L(tail6):
+ TAIL(v5,v11,v13)
+
+L(tail7):
+ TAIL(v4,v14,v16)
+
+L(tail8):
+ TAIL(v5,v15,v17)
+
+END (MEMCMP)
+libc_hidden_builtin_def (memcmp)
+weak_alias (memcmp, bcmp)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index e6e013d..626845a 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -32,7 +32,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
strncase-power8
ifneq (,$(filter %le,$(config-machine)))
-sysdep_routines += memcpy-power10 memmove-power10 memset-power10 \
+sysdep_routines += memcmp-power10 memcpy-power10 memmove-power10 memset-power10 \
rawmemchr-power9 rawmemchr-power10 \
strcmp-power9 strncmp-power9 strcpy-power9 stpcpy-power9 \
strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index a92b674..0acdf22 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -184,6 +184,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/powerpc/powerpc64/multiarch/memcmp.c. */
IFUNC_IMPL (i, name, memcmp,
+#ifdef __LITTLE_ENDIAN__
+ IFUNC_IMPL_ADD (array, i, memcmp,
+ hwcap2 & PPC_FEATURE2_ARCH_3_1
+ && hwcap & PPC_FEATURE_HAS_VSX,
+ __memcmp_power10)
+#endif
IFUNC_IMPL_ADD (array, i, memcmp, hwcap2 & PPC_FEATURE2_ARCH_2_07,
__memcmp_power8)
IFUNC_IMPL_ADD (array, i, memcmp, hwcap & PPC_FEATURE_HAS_VSX,
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/memcmp-power10.S
new file mode 100644
index 0000000..73a0deb
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/memcmp-power10.S
@@ -0,0 +1,26 @@
+/* Optimized memcmp implementation for POWER10.
+ Copyright (C) 2017-2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define MEMCMP __memcmp_power10
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+#undef weak_alias
+#define weak_alias(name,alias)
+
+#include <sysdeps/powerpc/powerpc64/le/power10/memcmp.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcmp.c b/sysdeps/powerpc/powerpc64/multiarch/memcmp.c
index 6fd5326..4fd089a 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memcmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memcmp.c
@@ -27,11 +27,17 @@ extern __typeof (memcmp) __memcmp_ppc attribute_hidden;
extern __typeof (memcmp) __memcmp_power4 attribute_hidden;
extern __typeof (memcmp) __memcmp_power7 attribute_hidden;
extern __typeof (memcmp) __memcmp_power8 attribute_hidden;
+extern __typeof (memcmp) __memcmp_power10 attribute_hidden;
# undef memcmp
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
ifunc symbol properly. */
libc_ifunc_redirected (__redirect_memcmp, memcmp,
+#ifdef __LITTLE_ENDIAN__
+ (hwcap2 & PPC_FEATURE2_ARCH_3_1
+ && hwcap & PPC_FEATURE_HAS_VSX)
+ ? __memcmp_power10 :
+#endif
(hwcap2 & PPC_FEATURE2_ARCH_2_07)
? __memcmp_power8 :
(hwcap & PPC_FEATURE_HAS_VSX)