diff options
author | dengjianbo <dengjianbo@loongson.cn> | 2023-08-28 10:08:36 +0800 |
---|---|---|
committer | caiyinyu <caiyinyu@loongson.cn> | 2023-08-29 10:35:38 +0800 |
commit | 60bcb9acbfcb40d1b613a13b539cb75e500b4ad6 (patch) | |
tree | f204b7309aa842014f41d3bcef3eecac02e53ce4 | |
parent | f8664fe2155eb5ddc22272bac72ab26368735718 (diff) | |
download | glibc-60bcb9acbfcb40d1b613a13b539cb75e500b4ad6.zip glibc-60bcb9acbfcb40d1b613a13b539cb75e500b4ad6.tar.gz glibc-60bcb9acbfcb40d1b613a13b539cb75e500b4ad6.tar.bz2 |
LoongArch: Add ifunc support for memchr{aligned, lsx, lasx}
According to glibc memchr microbenchmark, this implementation could reduce
the runtime as following:
Name Percent of runtime reduced
memchr-lasx 37%-83%
memchr-lsx 30%-66%
memchr-aligned 0%-15%
-rw-r--r-- | sysdeps/loongarch/lp64/multiarch/Makefile | 3 | ||||
-rw-r--r-- | sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c | 7 | ||||
-rw-r--r-- | sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h | 40 | ||||
-rw-r--r-- | sysdeps/loongarch/lp64/multiarch/memchr-aligned.S | 95 | ||||
-rw-r--r-- | sysdeps/loongarch/lp64/multiarch/memchr-lasx.S | 117 | ||||
-rw-r--r-- | sysdeps/loongarch/lp64/multiarch/memchr-lsx.S | 102 | ||||
-rw-r--r-- | sysdeps/loongarch/lp64/multiarch/memchr.c | 37 |
7 files changed, 401 insertions, 0 deletions
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile index 64416b0..2f4802c 100644 --- a/sysdeps/loongarch/lp64/multiarch/Makefile +++ b/sysdeps/loongarch/lp64/multiarch/Makefile @@ -24,5 +24,8 @@ sysdep_routines += \ rawmemchr-aligned \ rawmemchr-lsx \ rawmemchr-lasx \ + memchr-aligned \ + memchr-lsx \ + memchr-lasx \ # sysdep_routines endif diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c index 3db9af1..a567b9c 100644 --- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c @@ -102,5 +102,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned) ) + IFUNC_IMPL (i, name, memchr, +#if !defined __loongarch_soft_float + IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LASX, __memchr_lasx) + IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LSX, __memchr_lsx) +#endif + IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned) + ) return i; } diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h new file mode 100644 index 0000000..9060ccd --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h @@ -0,0 +1,40 @@ +/* Common definition for memchr ifunc selections. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <ldsodefs.h> +#include <ifunc-init.h> + +#if !defined __loongarch_soft_float +extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; +#endif +extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ +#if !defined __loongarch_soft_float + if (SUPPORT_LASX) + return OPTIMIZE (lasx); + else if (SUPPORT_LSX) + return OPTIMIZE (lsx); + else +#endif + return OPTIMIZE (aligned); +} diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S new file mode 100644 index 0000000..81d0d00 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S @@ -0,0 +1,95 @@ +/* Optimized memchr implementation using basic LoongArch instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <sys/regdef.h> +#include <sys/asm.h> + +#if IS_IN (libc) +# define MEMCHR_NAME __memchr_aligned +#else +# define MEMCHR_NAME memchr +#endif + +LEAF(MEMCHR_NAME, 6) + beqz a2, L(out) + andi t1, a0, 0x7 + add.d a5, a0, a2 + bstrins.d a0, zero, 2, 0 + + ld.d t0, a0, 0 + bstrins.d a1, a1, 15, 8 + lu12i.w a3, 0x01010 + slli.d t2, t1, 03 + + bstrins.d a1, a1, 31, 16 + ori a3, a3, 0x101 + li.d t7, -1 + li.d t8, 8 + + bstrins.d a1, a1, 63, 32 + bstrins.d a3, a3, 63, 32 + sll.d t2, t7, t2 + xor t0, t0, a1 + + + addi.d a6, a5, -1 + slli.d a4, a3, 7 + sub.d t1, t8, t1 + orn t0, t0, t2 + + sub.d t2, t0, a3 + andn t3, a4, t0 + bstrins.d a6, zero, 2, 0 + and t0, t2, t3 + + bgeu t1, a2, L(end) +L(loop): + bnez t0, L(found) + ld.d t1, a0, 8 + xor t0, t1, a1 + + addi.d a0, a0, 8 + sub.d t2, t0, a3 + andn t3, a4, t0 + and t0, t2, t3 + + + bne a0, a6, L(loop) +L(end): + sub.d t1, a5, a6 + ctz.d t0, t0 + srli.d t0, t0, 3 + + sltu t1, t0, t1 + add.d a0, a0, t0 + maskeqz a0, a0, t1 + jr ra + +L(found): + ctz.d t0, t0 + srli.d t0, t0, 3 + add.d a0, a0, t0 + jr ra + +L(out): + move a0, zero + jr ra +END(MEMCHR_NAME) + +libc_hidden_builtin_def (MEMCHR_NAME) diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S new file mode 100644 index 0000000..a26cdf4 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S @@ -0,0 +1,117 @@ +/* Optimized memchr implementation using LoongArch LASX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <sys/regdef.h> +#include <sys/asm.h> + +#if IS_IN (libc) && !defined __loongarch_soft_float + +# define MEMCHR __memchr_lasx + +LEAF(MEMCHR, 6) + beqz a2, L(ret0) + add.d a3, a0, a2 + andi t0, a0, 0x3f + bstrins.d a0, zero, 5, 0 + + xvld xr0, a0, 0 + xvld xr1, a0, 32 + li.d t1, -1 + li.d t2, 64 + + xvreplgr2vr.b xr2, a1 + sll.d t3, t1, t0 + sub.d t2, t2, t0 + xvseq.b xr0, xr0, xr2 + + xvseq.b xr1, xr1, xr2 + xvmsknz.b xr0, xr0 + xvmsknz.b xr1, xr1 + xvpickve.w xr3, xr0, 4 + + + xvpickve.w xr4, xr1, 4 + vilvl.h vr0, vr3, vr0 + vilvl.h vr1, vr4, vr1 + vilvl.w vr0, vr1, vr0 + + movfr2gr.d t0, fa0 + and t0, t0, t3 + bgeu t2, a2, L(end) + bnez t0, L(found) + + addi.d a4, a3, -1 + bstrins.d a4, zero, 5, 0 +L(loop): + xvld xr0, a0, 64 + xvld xr1, a0, 96 + + addi.d a0, a0, 64 + xvseq.b xr0, xr0, xr2 + xvseq.b xr1, xr1, xr2 + beq a0, a4, L(out) + + + xvmax.bu xr3, xr0, xr1 + xvseteqz.v fcc0, xr3 + bcnez fcc0, L(loop) + xvmsknz.b xr0, xr0 + + xvmsknz.b xr1, xr1 + xvpickve.w xr3, xr0, 4 + xvpickve.w xr4, xr1, 4 + vilvl.h vr0, vr3, vr0 + + vilvl.h vr1, vr4, vr1 + vilvl.w vr0, vr1, vr0 + movfr2gr.d t0, fa0 +L(found): + ctz.d t1, t0 + + add.d a0, a0, t1 + jr ra +L(ret0): + move a0, zero + jr ra + + +L(out): + xvmsknz.b xr0, xr0 + xvmsknz.b xr1, xr1 + xvpickve.w xr3, xr0, 4 + xvpickve.w xr4, xr1, 4 + + vilvl.h vr0, vr3, vr0 + vilvl.h vr1, vr4, vr1 + vilvl.w vr0, vr1, vr0 + movfr2gr.d t0, fa0 + +L(end): + sub.d t2, zero, a3 + srl.d t1, t1, t2 + and t0, t0, t1 + ctz.d t1, t0 + + add.d a0, a0, t1 + maskeqz a0, a0, t0 + jr ra +END(MEMCHR) + +libc_hidden_builtin_def (MEMCHR) +#endif diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S new file mode 100644 index 0000000..a73ecd2 --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S @@ -0,0 +1,102 @@ +/* Optimized memchr implementation using LoongArch LSX instructions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <sys/regdef.h> +#include <sys/asm.h> + +#if IS_IN (libc) && !defined __loongarch_soft_float + +# define MEMCHR __memchr_lsx + +LEAF(MEMCHR, 6) + beqz a2, L(ret0) + add.d a3, a0, a2 + andi t0, a0, 0x1f + bstrins.d a0, zero, 4, 0 + + vld vr0, a0, 0 + vld vr1, a0, 16 + li.d t1, -1 + li.d t2, 32 + + vreplgr2vr.b vr2, a1 + sll.d t3, t1, t0 + sub.d t2, t2, t0 + vseq.b vr0, vr0, vr2 + + vseq.b vr1, vr1, vr2 + vmsknz.b vr0, vr0 + vmsknz.b vr1, vr1 + vilvl.h vr0, vr1, vr0 + + + movfr2gr.s t0, fa0 + and t0, t0, t3 + bgeu t2, a2, L(end) + bnez t0, L(found) + + addi.d a4, a3, -1 + bstrins.d a4, zero, 4, 0 +L(loop): + vld vr0, a0, 32 + vld vr1, a0, 48 + + addi.d a0, a0, 32 + vseq.b vr0, vr0, vr2 + vseq.b vr1, vr1, vr2 + beq a0, a4, L(out) + + vmax.bu vr3, vr0, vr1 + vseteqz.v fcc0, vr3 + bcnez fcc0, L(loop) + vmsknz.b vr0, vr0 + + + vmsknz.b vr1, vr1 + vilvl.h vr0, vr1, vr0 + movfr2gr.s t0, fa0 +L(found): + ctz.w t0, t0 + + add.d a0, a0, t0 + jr ra +L(ret0): + move a0, zero + jr ra + +L(out): + vmsknz.b vr0, vr0 + vmsknz.b vr1, vr1 + vilvl.h vr0, vr1, vr0 + movfr2gr.s t0, fa0 + +L(end): + sub.d t2, zero, a3 + srl.w t1, t1, t2 + and t0, t0, t1 + ctz.w t1, t0 + + + add.d a0, a0, t1 + maskeqz a0, a0, t0 + jr ra +END(MEMCHR) + +libc_hidden_builtin_def (MEMCHR) +#endif diff --git a/sysdeps/loongarch/lp64/multiarch/memchr.c b/sysdeps/loongarch/lp64/multiarch/memchr.c new file mode 100644 index 0000000..059479c --- /dev/null +++ b/sysdeps/loongarch/lp64/multiarch/memchr.c @@ -0,0 +1,37 @@ +/* Multiple versions of memchr. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in libc. */ +#if IS_IN (libc) +# define memchr __redirect_memchr +# include <string.h> +# undef memchr + +# define SYMBOL_NAME memchr +# include "ifunc-memchr.h" + +libc_ifunc_redirected (__redirect_memchr, memchr, + IFUNC_SELECTOR ()); + +# ifdef SHARED +__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr) + __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr); +# endif + +#endif |