aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordengjianbo <dengjianbo@loongson.cn>2023-09-13 15:35:00 +0800
committercaiyinyu <caiyinyu@loongson.cn>2023-09-15 09:07:47 +0800
commit24279aecf34a830a744038cb922d94b5a2d9c1cb (patch)
tree8412b585df3f8206103d1e9db8133a1de1043e54
parent06251002d4a1d92101ae7205be380887a4129471 (diff)
downloadglibc-24279aecf34a830a744038cb922d94b5a2d9c1cb.zip
glibc-24279aecf34a830a744038cb922d94b5a2d9c1cb.tar.gz
glibc-24279aecf34a830a744038cb922d94b5a2d9c1cb.tar.bz2
LoongArch: Add ifunc support for strrchr{aligned, lsx, lasx}
According to glibc strrchr microbenchmark test results, this implementation could reduce the runtime time as following: Name Percent of rutime reduced strrchr-lasx 10%-50% strrchr-lsx 0%-50% strrchr-aligned 5%-50% Generic strrchr is implemented by function strlen + memrchr, the lasx version will compare with generic strrchr implemented by strlen-lasx + memrchr-lasx, the lsx version will compare with generic strrchr implemented by strlen-lsx + memrchr-lsx, the aligned version will compare with generic strrchr implemented by strlen-aligned + memrchr-generic.
-rw-r--r--sysdeps/loongarch/lp64/multiarch/Makefile3
-rw-r--r--sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c8
-rw-r--r--sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h41
-rw-r--r--sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S170
-rw-r--r--sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S176
-rw-r--r--sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S144
-rw-r--r--sysdeps/loongarch/lp64/multiarch/strrchr.c36
7 files changed, 578 insertions, 0 deletions
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index 39550be..fe863e1 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -9,6 +9,9 @@ sysdep_routines += \
strchr-aligned \
strchr-lsx \
strchr-lasx \
+ strrchr-aligned \
+ strrchr-lsx \
+ strrchr-lasx \
strchrnul-aligned \
strchrnul-lsx \
strchrnul-lasx \
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index 39a14f1..529e236 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -94,6 +94,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned)
)
+ IFUNC_IMPL (i, name, strrchr,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LASX, __strrchr_lasx)
+ IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LSX, __strrchr_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_aligned)
+ )
+
IFUNC_IMPL (i, name, memcpy,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
new file mode 100644
index 0000000..bbb3408
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
@@ -0,0 +1,41 @@
+/* Common definition for strrchr ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
new file mode 100644
index 0000000..a73deb7
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
@@ -0,0 +1,170 @@
+/* Optimized strrchr implementation using basic LoongArch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STRRCHR __strrchr_aligned
+#else
+# define STRRCHR strrchr
+#endif
+
+LEAF(STRRCHR, 6)
+ slli.d t0, a0, 3
+ bstrins.d a0, zero, 2, 0
+ lu12i.w a2, 0x01010
+ ld.d t2, a0, 0
+
+ andi a1, a1, 0xff
+ ori a2, a2, 0x101
+ li.d t3, -1
+ bstrins.d a2, a2, 63, 32
+
+ sll.d t5, t3, t0
+ slli.d a3, a2, 7
+ orn t4, t2, t5
+ mul.d a1, a1, a2
+
+ sub.d t0, t4, a2
+ andn t1, a3, t4
+ and t1, t0, t1
+ beqz t1, L(find_tail)
+
+
+ ctz.d t0, t1
+ orn t0, zero, t0
+ xor t2, t4, a1
+ srl.d t0, t3, t0
+
+ orn t2, t2, t0
+ orn t2, t2, t5
+ revb.d t2, t2
+ sub.d t1, t2, a2
+
+ andn t0, a3, t2
+ and t1, t0, t1
+ ctz.d t0, t1
+ srli.d t0, t0, 3
+
+ addi.d a0, a0, 7
+ sub.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+
+
+L(find_tail):
+ addi.d a4, a0, 8
+ addi.d a0, a0, 8
+L(loop_ascii):
+ ld.d t2, a0, 0
+ sub.d t1, t2, a2
+
+ and t0, t1, a3
+ bnez t0, L(more_check)
+ ld.d t2, a0, 8
+ sub.d t1, t2, a2
+
+ and t0, t1, a3
+ addi.d a0, a0, 16
+ beqz t0, L(loop_ascii)
+ addi.d a0, a0, -8
+
+L(more_check):
+ andn t0, a3, t2
+ and t1, t1, t0
+ bnez t1, L(tail)
+ addi.d a0, a0, 8
+
+
+L(loop_nonascii):
+ ld.d t2, a0, 0
+ sub.d t1, t2, a2
+ andn t0, a3, t2
+ and t1, t0, t1
+
+ bnez t1, L(tail)
+ ld.d t2, a0, 8
+ addi.d a0, a0, 16
+ sub.d t1, t2, a2
+
+ andn t0, a3, t2
+ and t1, t0, t1
+ beqz t1, L(loop_nonascii)
+ addi.d a0, a0, -8
+
+L(tail):
+ ctz.d t0, t1
+ orn t0, zero, t0
+ xor t2, t2, a1
+ srl.d t0, t3, t0
+
+
+ orn t2, t2, t0
+ revb.d t2, t2
+ sub.d t1, t2, a2
+ andn t0, a3, t2
+
+ and t1, t0, t1
+ bnez t1, L(count_pos)
+L(find_loop):
+ beq a0, a4, L(find_end)
+ ld.d t2, a0, -8
+
+ addi.d a0, a0, -8
+ xor t2, t2, a1
+ sub.d t1, t2, a2
+ andn t0, a3, t2
+
+ and t1, t0, t1
+ beqz t1, L(find_loop)
+ revb.d t2, t2
+ sub.d t1, t2, a2
+
+
+ andn t0, a3, t2
+ and t1, t0, t1
+L(count_pos):
+ ctz.d t0, t1
+ addi.d a0, a0, 7
+
+ srli.d t0, t0, 3
+ sub.d a0, a0, t0
+ jr ra
+ nop
+
+L(find_end):
+ xor t2, t4, a1
+ orn t2, t2, t5
+ revb.d t2, t2
+ sub.d t1, t2, a2
+
+
+ andn t0, a3, t2
+ and t1, t0, t1
+ ctz.d t0, t1
+ srli.d t0, t0, 3
+
+ addi.d a0, a4, -1
+ sub.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+END(STRRCHR)
+
+libc_hidden_builtin_def(STRRCHR)
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
new file mode 100644
index 0000000..5a6e229
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
@@ -0,0 +1,176 @@
+/* Optimized strrchr implementation using LoongArch LASX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#define STRRCHR __strrchr_lasx
+
+LEAF(STRRCHR, 6)
+ move a2, a0
+ bstrins.d a0, zero, 5, 0
+ xvld xr0, a0, 0
+ xvld xr1, a0, 32
+
+ li.d t2, -1
+ xvreplgr2vr.b xr4, a1
+ xvmsknz.b xr2, xr0
+ xvmsknz.b xr3, xr1
+
+ xvpickve.w xr5, xr2, 4
+ xvpickve.w xr6, xr3, 4
+ vilvl.h vr2, vr5, vr2
+ vilvl.h vr3, vr6, vr3
+
+ vilvl.w vr2, vr3, vr2
+ movfr2gr.d t0, fa2
+ sra.d t0, t0, a2
+ beq t0, t2, L(find_tail)
+
+
+ xvseq.b xr2, xr0, xr4
+ xvseq.b xr3, xr1, xr4
+ xvmsknz.b xr2, xr2
+ xvmsknz.b xr3, xr3
+
+ xvpickve.w xr4, xr2, 4
+ xvpickve.w xr5, xr3, 4
+ vilvl.h vr2, vr4, vr2
+ vilvl.h vr3, vr5, vr3
+
+ vilvl.w vr1, vr3, vr2
+ slli.d t3, t2, 1
+ movfr2gr.d t1, fa1
+ cto.d t0, t0
+
+ srl.d t1, t1, a2
+ sll.d t3, t3, t0
+ addi.d a0, a2, 63
+ andn t1, t1, t3
+
+
+ clz.d t0, t1
+ sub.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+
+ .align 5
+L(find_tail):
+ addi.d a3, a0, 64
+L(loop):
+ xvld xr2, a0, 64
+ xvld xr3, a0, 96
+ addi.d a0, a0, 64
+
+ xvmin.bu xr5, xr2, xr3
+ xvsetanyeqz.b fcc0, xr5
+ bceqz fcc0, L(loop)
+ xvmsknz.b xr5, xr2
+
+
+ xvmsknz.b xr6, xr3
+ xvpickve.w xr7, xr5, 4
+ xvpickve.w xr8, xr6, 4
+ vilvl.h vr5, vr7, vr5
+
+ vilvl.h vr6, vr8, vr6
+ xvseq.b xr2, xr2, xr4
+ xvseq.b xr3, xr3, xr4
+ xvmsknz.b xr2, xr2
+
+ xvmsknz.b xr3, xr3
+ xvpickve.w xr7, xr2, 4
+ xvpickve.w xr8, xr3, 4
+ vilvl.h vr2, vr7, vr2
+
+ vilvl.h vr3, vr8, vr3
+ vilvl.w vr5, vr6, vr5
+ vilvl.w vr2, vr3, vr2
+ movfr2gr.d t0, fa5
+
+
+ movfr2gr.d t1, fa2
+ slli.d t3, t2, 1
+ cto.d t0, t0
+ sll.d t3, t3, t0
+
+ andn t1, t1, t3
+ beqz t1, L(find_loop)
+ clz.d t0, t1
+ addi.d a0, a0, 63
+
+ sub.d a0, a0, t0
+ jr ra
+L(find_loop):
+ beq a0, a3, L(find_end)
+ xvld xr2, a0, -64
+
+ xvld xr3, a0, -32
+ addi.d a0, a0, -64
+ xvseq.b xr2, xr2, xr4
+ xvseq.b xr3, xr3, xr4
+
+
+ xvmax.bu xr5, xr2, xr3
+ xvseteqz.v fcc0, xr5
+ bcnez fcc0, L(find_loop)
+ xvmsknz.b xr0, xr2
+
+ xvmsknz.b xr1, xr3
+ xvpickve.w xr2, xr0, 4
+ xvpickve.w xr3, xr1, 4
+ vilvl.h vr0, vr2, vr0
+
+ vilvl.h vr1, vr3, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+ addi.d a0, a0, 63
+
+ clz.d t0, t0
+ sub.d a0, a0, t0
+ jr ra
+ nop
+
+
+L(find_end):
+ xvseq.b xr2, xr0, xr4
+ xvseq.b xr3, xr1, xr4
+ xvmsknz.b xr2, xr2
+ xvmsknz.b xr3, xr3
+
+ xvpickve.w xr4, xr2, 4
+ xvpickve.w xr5, xr3, 4
+ vilvl.h vr2, vr4, vr2
+ vilvl.h vr3, vr5, vr3
+
+ vilvl.w vr1, vr3, vr2
+ movfr2gr.d t1, fa1
+ addi.d a0, a2, 63
+ srl.d t1, t1, a2
+
+ clz.d t0, t1
+ sub.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+END(STRRCHR)
+
+libc_hidden_builtin_def(STRRCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
new file mode 100644
index 0000000..8f2fd22
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
@@ -0,0 +1,144 @@
+/* Optimized strrchr implementation using LoongArch LSX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+#define STRRCHR __strrchr_lsx
+
+LEAF(STRRCHR, 6)
+ move a2, a0
+ bstrins.d a0, zero, 4, 0
+ vld vr0, a0, 0
+ vld vr1, a0, 16
+
+ li.d t2, -1
+ vreplgr2vr.b vr4, a1
+ vmsknz.b vr2, vr0
+ vmsknz.b vr3, vr1
+
+ vilvl.h vr2, vr3, vr2
+ movfr2gr.s t0, fa2
+ sra.w t0, t0, a2
+ beq t0, t2, L(find_tail)
+
+ vseq.b vr2, vr0, vr4
+ vseq.b vr3, vr1, vr4
+ vmsknz.b vr2, vr2
+ vmsknz.b vr3, vr3
+
+
+ vilvl.h vr1, vr3, vr2
+ slli.d t3, t2, 1
+ movfr2gr.s t1, fa1
+ cto.w t0, t0
+
+ srl.w t1, t1, a2
+ sll.d t3, t3, t0
+ addi.d a0, a2, 31
+ andn t1, t1, t3
+
+ clz.w t0, t1
+ sub.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+
+ .align 5
+L(find_tail):
+ addi.d a3, a0, 32
+L(loop):
+ vld vr2, a0, 32
+ vld vr3, a0, 48
+ addi.d a0, a0, 32
+
+ vmin.bu vr5, vr2, vr3
+ vsetanyeqz.b fcc0, vr5
+ bceqz fcc0, L(loop)
+ vmsknz.b vr5, vr2
+
+ vmsknz.b vr6, vr3
+ vilvl.h vr5, vr6, vr5
+ vseq.b vr2, vr2, vr4
+ vseq.b vr3, vr3, vr4
+
+ vmsknz.b vr2, vr2
+ vmsknz.b vr3, vr3
+ vilvl.h vr2, vr3, vr2
+ movfr2gr.s t0, fa5
+
+
+ movfr2gr.s t1, fa2
+ slli.d t3, t2, 1
+ cto.w t0, t0
+ sll.d t3, t3, t0
+
+ andn t1, t1, t3
+ beqz t1, L(find_loop)
+ clz.w t0, t1
+ addi.d a0, a0, 31
+
+ sub.d a0, a0, t0
+ jr ra
+L(find_loop):
+ beq a0, a3, L(find_end)
+ vld vr2, a0, -32
+
+ vld vr3, a0, -16
+ addi.d a0, a0, -32
+ vseq.b vr2, vr2, vr4
+ vseq.b vr3, vr3, vr4
+
+
+ vmax.bu vr5, vr2, vr3
+ vseteqz.v fcc0, vr5
+ bcnez fcc0, L(find_loop)
+ vmsknz.b vr0, vr2
+
+ vmsknz.b vr1, vr3
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+ addi.d a0, a0, 31
+
+ clz.w t0, t0
+ sub.d a0, a0, t0
+ jr ra
+ nop
+
+L(find_end):
+ vseq.b vr2, vr0, vr4
+ vseq.b vr3, vr1, vr4
+ vmsknz.b vr2, vr2
+ vmsknz.b vr3, vr3
+
+
+ vilvl.h vr1, vr3, vr2
+ movfr2gr.s t1, fa1
+ addi.d a0, a2, 31
+ srl.w t1, t1, a2
+
+ clz.w t0, t1
+ sub.d a0, a0, t0
+ maskeqz a0, a0, t1
+ jr ra
+END(STRRCHR)
+
+libc_hidden_builtin_def(STRRCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr.c b/sysdeps/loongarch/lp64/multiarch/strrchr.c
new file mode 100644
index 0000000..d9c9f66
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/strrchr.c
@@ -0,0 +1,36 @@
+/* Multiple versions of strrchr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define strrchr __redirect_strrchr
+# include <string.h>
+# undef strrchr
+
+# define SYMBOL_NAME strrchr
+# include "ifunc-strrchr.h"
+
+libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ());
+weak_alias (strrchr, rindex)
+# ifdef SHARED
+__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strrchr);
+# endif
+
+#endif