aboutsummaryrefslogtreecommitdiff
path: root/newlib/libc/machine/riscv
diff options
context:
space:
mode:
Diffstat (limited to 'newlib/libc/machine/riscv')
-rw-r--r--newlib/libc/machine/riscv/Makefile.inc4
-rw-r--r--newlib/libc/machine/riscv/memchr.c152
-rw-r--r--newlib/libc/machine/riscv/memcpy-asm.S12
-rw-r--r--newlib/libc/machine/riscv/memcpy.c163
-rw-r--r--newlib/libc/machine/riscv/memmove-asm.S40
-rw-r--r--newlib/libc/machine/riscv/memmove-stub.c14
-rw-r--r--newlib/libc/machine/riscv/memmove.S40
-rw-r--r--newlib/libc/machine/riscv/memmove.c259
-rw-r--r--newlib/libc/machine/riscv/memrchr.c172
-rw-r--r--newlib/libc/machine/riscv/memset.S349
-rw-r--r--newlib/libc/machine/riscv/rv_string.h51
-rw-r--r--newlib/libc/machine/riscv/setjmp.S78
-rw-r--r--newlib/libc/machine/riscv/strcmp.S198
-rw-r--r--newlib/libc/machine/riscv/strlen.c19
-rw-r--r--newlib/libc/machine/riscv/xlenint.h7
15 files changed, 1240 insertions, 318 deletions
diff --git a/newlib/libc/machine/riscv/Makefile.inc b/newlib/libc/machine/riscv/Makefile.inc
index 4d6c046..3cc6e19 100644
--- a/newlib/libc/machine/riscv/Makefile.inc
+++ b/newlib/libc/machine/riscv/Makefile.inc
@@ -1,3 +1,3 @@
libc_a_SOURCES += \
- %D%/memmove.S %D%/memmove-stub.c %D%/memset.S %D%/memcpy-asm.S %D%/memcpy.c %D%/strlen.c \
- %D%/strcpy.c %D%/stpcpy.c %D%/strcmp.S %D%/setjmp.S %D%/ieeefp.c %D%/ffs.c
+ %D%/memmove-asm.S %D%/memmove.c %D%/memset.S %D%/memcpy-asm.S %D%/memcpy.c %D%/strlen.c \
+ %D%/strcpy.c %D%/stpcpy.c %D%/strcmp.S %D%/memchr.c %D%/memrchr.c %D%/setjmp.S %D%/ieeefp.c %D%/ffs.c
diff --git a/newlib/libc/machine/riscv/memchr.c b/newlib/libc/machine/riscv/memchr.c
new file mode 100644
index 0000000..62a7d19
--- /dev/null
+++ b/newlib/libc/machine/riscv/memchr.c
@@ -0,0 +1,152 @@
+/*
+FUNCTION
+ <<memchr>>---find character in memory
+
+INDEX
+ memchr
+
+SYNOPSIS
+ #include <string.h>
+ void *memchr(const void *<[src]>, int <[c]>, size_t <[length]>);
+
+DESCRIPTION
+ This function searches memory starting at <<*<[src]>>> for the
+ character <[c]>. The search only ends with the first
+ occurrence of <[c]>, or after <[length]> characters; in
+ particular, <<NUL>> does not terminate the search.
+
+RETURNS
+ If the character <[c]> is found within <[length]> characters
+ of <<*<[src]>>>, a pointer to the character is returned. If
+ <[c]> is not found, then <<NULL>> is returned.
+
+PORTABILITY
+<<memchr>> is ANSI C.
+
+<<memchr>> requires no supporting OS subroutines.
+
+QUICKREF
+ memchr ansi pure
+*/
+
+#include <sys/asm.h>
+#include <stddef.h>
+#include "rv_string.h"
+
+// Move size
+#if __riscv_zilsd
+#define MV_SZ 8
+#else
+#define MV_SZ SZREG
+#endif
+
+
+void *
+memchr (const void *src_void,
+ int c,
+ size_t length)
+{
+ const unsigned char *src = (const unsigned char *) src_void;
+ unsigned char d = c;
+
+#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+ size_t align = (uintptr_t) src & (MV_SZ - 1);
+
+ if (align)
+ {
+ align = MV_SZ - align;
+
+ if (length < align) align = length;
+
+ switch (align)
+ {
+#if MV_SZ == 8
+ case 7:
+ if (*src++ == d) return (void *) (src - 1);
+ case 6:
+ if (*src++ == d) return (void *) (src - 1);
+ case 5:
+ if (*src++ == d) return (void *) (src - 1);
+ case 4:
+ if (*src++ == d) return (void *) (src - 1);
+#endif /* MV_SZ == 8 */
+ case 3:
+ if (*src++ == d) return (void *) (src - 1);
+ case 2:
+ if (*src++ == d) return (void *) (src - 1);
+ case 1:
+ if (*src++ == d) return (void *) (src - 1);
+ }
+
+ length -= align;
+ }
+
+ const unsigned char *end_addr = src + (length & ~(MV_SZ - 1));
+
+ if (src < end_addr)
+ {
+ uintxlen_t mask = __libc_splat_byte(d);
+
+ do
+ {
+ uintlslen_t val = *(uintlslen_t*) src;
+ uintxlen_t word1 = val ^ mask;
+
+ if (__libc_detect_null(word1))
+ {
+#if __riscv_zbb
+ word1 = ~__LIBC_RISCV_ZBB_ORC_B(word1);
+ word1 = __LIBC_RISCV_ZBB_CNT_Z(word1);
+
+ return (void *) (src + (word1 >> 3));
+#else /* not __riscv_zbb */
+ if (*src++ == d) return (void *) (src - 1);
+ if (*src++ == d) return (void *) (src - 1);
+ if (*src++ == d) return (void *) (src - 1);
+#if __riscv_xlen == 64
+ if (*src++ == d) return (void *) (src - 1);
+ if (*src++ == d) return (void *) (src - 1);
+ if (*src++ == d) return (void *) (src - 1);
+ if (*src++ == d) return (void *) (src - 1);
+#endif /* __riscv_xlen == 64 */
+ return (void *) src;
+#endif /* __riscv_zbb */
+ }
+#if __riscv_zilsd
+ uintxlen_t word2 = (val >> 32);
+ word2 ^= mask;
+
+ if (__libc_detect_null(word2))
+ {
+ src += MV_SZ / 2;
+#if __riscv_zbb
+ word2 = ~__LIBC_RISCV_ZBB_ORC_B(word2);
+ word2 = __LIBC_RISCV_ZBB_CNT_Z(word2);
+
+ return (void *) (src + (word2 >> 3));
+#else /* not __riscv_zbb */
+ if (*src++ == d) return (void *) (src - 1);
+ if (*src++ == d) return (void *) (src - 1);
+ if (*src++ == d) return (void *) (src - 1);
+ return (void *) src;
+#endif /* __riscv_zbb */
+ }
+#endif /* __riscv_zilsd */
+
+ src += MV_SZ;
+ } while (src < end_addr);
+
+ length &= MV_SZ - 1;
+ }
+
+#endif /* not PREFER_SIZE_OVER_SPEED */
+
+ while (length--)
+ {
+ if (*src == d)
+ return (void *) src;
+ src++;
+ }
+
+ return NULL;
+}
diff --git a/newlib/libc/machine/riscv/memcpy-asm.S b/newlib/libc/machine/riscv/memcpy-asm.S
index 5571e47..2771285 100644
--- a/newlib/libc/machine/riscv/memcpy-asm.S
+++ b/newlib/libc/machine/riscv/memcpy-asm.S
@@ -14,15 +14,15 @@
.global memcpy
.type memcpy, @function
memcpy:
- mv t1, a0
+ mv a3, a0
beqz a2, 2f
1:
- lb t2, 0(a1)
- sb t2, 0(t1)
- add a2, a2, -1
- add t1, t1, 1
- add a1, a1, 1
+ lbu a4, 0(a1)
+ sb a4, 0(a3)
+ addi a2, a2, -1
+ addi a3, a3, 1
+ addi a1, a1, 1
bnez a2, 1b
2:
diff --git a/newlib/libc/machine/riscv/memcpy.c b/newlib/libc/machine/riscv/memcpy.c
index e1a34a8..a27e0ec 100644
--- a/newlib/libc/machine/riscv/memcpy.c
+++ b/newlib/libc/machine/riscv/memcpy.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2017 SiFive Inc. All rights reserved.
+ Copyright (c) 2025 Mahmoud Abumandour <ma.mandourr@gmail.com>
This copyrighted material is made available to anyone wishing to use,
modify, copy, or redistribute it subject to the terms and conditions
@@ -10,83 +11,137 @@
*/
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
-//memcpy defined in memcpy-asm.S
+// memcpy defined in memcpy-asm.S
#else
-#include <string.h>
-#include <stdint.h>
#include "../../string/local.h"
+#include "xlenint.h"
+#include <string.h>
+#include <sys/asm.h>
#define unlikely(X) __builtin_expect (!!(X), 0)
-void *
-__inhibit_loop_to_libcall
-memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
+static inline void
+__libc_memcpy_bytewise (unsigned char *dst, const unsigned char *src,
+ const size_t sz)
{
- #define BODY(a, b, t) { \
- t tt = *b; \
- a++, b++; \
- *(a - 1) = tt; \
- }
+ const unsigned char *end = dst + sz;
+ while (dst < end)
+ *dst++ = *src++;
+}
- char *a = (char *)aa;
- const char *b = (const char *)bb;
- char *end = a + n;
- uintptr_t msk = sizeof (long) - 1;
-#if __riscv_misaligned_slow || __riscv_misaligned_fast
- if (n < sizeof (long))
-#else
- if (unlikely ((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
- || n < sizeof (long)))
+#ifndef __riscv_misaligned_fast
+static uintxlen_t
+__libc_load_xlen (const void *src)
+{
+ const unsigned char *p = (const unsigned char *)src;
+ uintxlen_t ret = 0;
+ unsigned char b0 = *p++;
+ unsigned char b1 = *p++;
+ unsigned char b2 = *p++;
+ unsigned char b3 = *p++;
+ ret = (uintxlen_t)b0 | ((uintxlen_t)b1 << 8) | ((uintxlen_t)b2 << 16)
+ | ((uintxlen_t)b3 << 24);
+#if __riscv_xlen == 64
+ unsigned char b4 = *p++;
+ unsigned char b5 = *p++;
+ unsigned char b6 = *p++;
+ unsigned char b7 = *p++;
+ ret |= ((uintxlen_t)b4 << 32) | ((uintxlen_t)b5 << 40)
+ | ((uintxlen_t)b6 << 48) | ((uintxlen_t)b7 << 56);
+#endif
+ return ret;
+}
#endif
+
+void *
+__inhibit_loop_to_libcall
+memcpy (void *__restrict aa, const void *__restrict bb, size_t n)
+{
+ unsigned char *a = (unsigned char *)aa;
+ const unsigned char *b = (const unsigned char *)bb;
+ unsigned char *end = a + n;
+ uintptr_t msk = SZREG - 1;
+ if (n < SZREG)
{
-small:
if (__builtin_expect (a < end, 1))
- while (a < end)
- BODY (a, b, char);
+ __libc_memcpy_bytewise (a, b, n);
return aa;
}
+/*
+ * If misaligned access is slow or prohibited, and the alignments of the source
+ * and destination are different, we align the destination to do XLEN stores.
+ * This uses only one aligned store for every four (or eight for XLEN == 64)
+ * bytes of data.
+ */
+#ifndef __riscv_misaligned_fast
+ if (unlikely ((((uintptr_t)a & msk) != ((uintptr_t)b & msk))))
+ {
+ size_t dst_pad = (uintptr_t)a & msk;
+ dst_pad = (SZREG - dst_pad) & msk;
+ __libc_memcpy_bytewise (a, b, dst_pad);
+ a += dst_pad;
+ b += dst_pad;
+
+ uintxlen_t *la = (uintxlen_t *)a;
+ const unsigned char *cb = (const unsigned char *)b;
+ uintxlen_t *lend = (uintxlen_t *)((uintptr_t)end & ~msk);
+
+ while (la < lend)
+ {
+ *la++ = __libc_load_xlen (cb);
+ cb += SZREG;
+ }
+ a = (unsigned char *)la;
+ b = (const unsigned char *)cb;
+ if (unlikely (a < end))
+ __libc_memcpy_bytewise (a, b, end - a);
+ return aa;
+ }
+#endif
+
if (unlikely (((uintptr_t)a & msk) != 0))
- while ((uintptr_t)a & msk)
- BODY (a, b, char);
+ {
+ size_t pad = SZREG - ((uintptr_t)a & msk);
+ __libc_memcpy_bytewise (a, b, pad);
+ a += pad;
+ b += pad;
+ }
- long *la = (long *)a;
- const long *lb = (const long *)b;
- long *lend = (long *)((uintptr_t)end & ~msk);
+ uintxlen_t *la = (uintxlen_t *)a;
+ const uintxlen_t *lb = (const uintxlen_t *)b;
+ uintxlen_t *lend = (uintxlen_t *)((uintptr_t)end & ~msk);
if (unlikely (lend - la > 8))
{
while (lend - la > 8)
- {
- long b0 = *lb++;
- long b1 = *lb++;
- long b2 = *lb++;
- long b3 = *lb++;
- long b4 = *lb++;
- long b5 = *lb++;
- long b6 = *lb++;
- long b7 = *lb++;
- long b8 = *lb++;
- *la++ = b0;
- *la++ = b1;
- *la++ = b2;
- *la++ = b3;
- *la++ = b4;
- *la++ = b5;
- *la++ = b6;
- *la++ = b7;
- *la++ = b8;
- }
+ {
+ uintxlen_t b0 = *lb++;
+ uintxlen_t b1 = *lb++;
+ uintxlen_t b2 = *lb++;
+ uintxlen_t b3 = *lb++;
+ uintxlen_t b4 = *lb++;
+ uintxlen_t b5 = *lb++;
+ uintxlen_t b6 = *lb++;
+ uintxlen_t b7 = *lb++;
+ uintxlen_t b8 = *lb++;
+ *la++ = b0;
+ *la++ = b1;
+ *la++ = b2;
+ *la++ = b3;
+ *la++ = b4;
+ *la++ = b5;
+ *la++ = b6;
+ *la++ = b7;
+ *la++ = b8;
+ }
}
- while (la < lend)
- BODY (la, lb, long);
-
- a = (char *)la;
- b = (const char *)lb;
+ a = (unsigned char *)la;
+ b = (const unsigned char *)lb;
if (unlikely (a < end))
- goto small;
+ __libc_memcpy_bytewise (a, b, end - a);
return aa;
}
#endif
diff --git a/newlib/libc/machine/riscv/memmove-asm.S b/newlib/libc/machine/riscv/memmove-asm.S
new file mode 100644
index 0000000..061472c
--- /dev/null
+++ b/newlib/libc/machine/riscv/memmove-asm.S
@@ -0,0 +1,40 @@
+/* Copyright (c) 2019 SiFive Inc. All rights reserved.
+
+ This copyrighted material is made available to anyone wishing to use,
+ modify, copy, or redistribute it subject to the terms and conditions
+ of the FreeBSD License. This program is distributed in the hope that
+ it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
+ including the implied warranties of MERCHANTABILITY or FITNESS FOR
+ A PARTICULAR PURPOSE. A copy of this license is available at
+ http://www.opensource.org/licenses.
+*/
+
+#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
+.text
+.global memmove
+.type memmove, @function
+memmove:
+ beqz a2, .Ldone /* in case there are 0 bytes to be copied, return immediately */
+
+ mv a4, a0 /* copy the destination address over to a4, since memmove should return that address in a0 at the end */
+ li a3, 1
+ bgtu a1, a0, .Lcopy /* in case of source address > destination address, copy from start to end of the specified memory area */
+
+ li a3, -1 /* otherwhise, start copying from the end of the specified memory area in order to prevent data loss in case of overlapping memory areas.*/
+ add a4, a4, a2 /* add the number of bytes to be copied to both addresses. this gives us the address one byte past the end of the memory area we want to copy, */
+ add a1, a1, a2 /* therefore we need to subtract 1 from both addresses in the next step before starting the copying process. */
+
+.Lincrement:
+ add a4, a4, a3 /* in case of source address < destination address, increment both addresses by -1 before copying any data to obtain the correct start addresses */
+ add a1, a1, a3
+.Lcopy:
+ lbu a5, 0(a1)
+ addi a2, a2, -1 /* copy bytes as long as a2 (= the number of bytes to be copied) > 0. the increment is done here to relax the RAW dependency between load and store */
+ sb a5, 0(a4)
+ bnez a2, .Lincrement
+
+.Ldone:
+ ret
+
+ .size memmove, .-memmove
+#endif
diff --git a/newlib/libc/machine/riscv/memmove-stub.c b/newlib/libc/machine/riscv/memmove-stub.c
deleted file mode 100644
index d882e46..0000000
--- a/newlib/libc/machine/riscv/memmove-stub.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/* Copyright (c) 2019 SiFive Inc. All rights reserved.
-
- This copyrighted material is made available to anyone wishing to use,
- modify, copy, or redistribute it subject to the terms and conditions
- of the FreeBSD License. This program is distributed in the hope that
- it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
- including the implied warranties of MERCHANTABILITY or FITNESS FOR
- A PARTICULAR PURPOSE. A copy of this license is available at
- http://www.opensource.org/licenses.
-*/
-
-#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
-#include "../../string/memmove.c"
-#endif
diff --git a/newlib/libc/machine/riscv/memmove.S b/newlib/libc/machine/riscv/memmove.S
deleted file mode 100644
index 66d9cd4..0000000
--- a/newlib/libc/machine/riscv/memmove.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Copyright (c) 2019 SiFive Inc. All rights reserved.
-
- This copyrighted material is made available to anyone wishing to use,
- modify, copy, or redistribute it subject to the terms and conditions
- of the FreeBSD License. This program is distributed in the hope that
- it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
- including the implied warranties of MERCHANTABILITY or FITNESS FOR
- A PARTICULAR PURPOSE. A copy of this license is available at
- http://www.opensource.org/licenses.
-*/
-
-#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
-.text
-.global memmove
-.type memmove, @function
-memmove:
- beqz a2, 2f
-
- mv t1, a0
- li a3, 1
- bgtu a1, a0, 1f
-
- li a3, -1
- addi a4, a2 , -1
- add t1, t1, a4
- add a1, a1, a4
-
-1:
- lb t2, 0(a1)
- sb t2, 0(t1)
- add a2, a2, -1
- add t1, t1, a3
- add a1, a1, a3
- bnez a2, 1b
-
-2:
- ret
-
- .size memmove, .-memmove
-#endif
diff --git a/newlib/libc/machine/riscv/memmove.c b/newlib/libc/machine/riscv/memmove.c
new file mode 100644
index 0000000..209a75c
--- /dev/null
+++ b/newlib/libc/machine/riscv/memmove.c
@@ -0,0 +1,259 @@
+/* Copyright (c) 2019 SiFive Inc. All rights reserved.
+ Copyright (c) 2025 Marlene Fally <marlene.fally@gmail.com>
+
+ This copyrighted material is made available to anyone wishing to use,
+ modify, copy, or redistribute it subject to the terms and conditions
+ of the FreeBSD License. This program is distributed in the hope that
+ it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
+ including the implied warranties of MERCHANTABILITY or FITNESS FOR
+ A PARTICULAR PURPOSE. A copy of this license is available at
+ http://www.opensource.org/licenses.
+*/
+
+#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
+/* memmove defined in memmove-asm.S */
+#else
+
+#include "../../string/local.h"
+#include "sys/asm.h"
+#include "xlenint.h"
+#include <limits.h>
+#include <stddef.h>
+#include <string.h>
+
+static inline uint8_t
+__libc_fast_xlen_aligned (void *dst, const void *src)
+{
+#if defined(__riscv_misaligned_fast)
+ return 1;
+#else
+ return !(((uintxlen_t)src & (SZREG - 1)) | ((uintxlen_t)dst & (SZREG - 1)));
+#endif
+}
+
+#if !defined(__riscv_misaligned_fast)
+static inline void
+__libc_memmove_misaligned_copy (unsigned char *dst,
+ const uintxlen_t *aligned_src)
+{
+ uintxlen_t src_xlen = *aligned_src;
+
+ *dst++ = (unsigned char)(src_xlen);
+ *dst++ = (unsigned char)(src_xlen >> 8);
+ *dst++ = (unsigned char)(src_xlen >> 16);
+ *dst++ = (unsigned char)(src_xlen >> 24);
+#if __riscv_xlen == 64
+ *dst++ = (unsigned char)(src_xlen >> 32);
+ *dst++ = (unsigned char)(src_xlen >> 40);
+ *dst++ = (unsigned char)(src_xlen >> 48);
+ *dst++ = (unsigned char)(src_xlen >> 56);
+#endif
+}
+#endif
+
+static inline void
+__libc_aligned_copy_unrolled (uintxlen_t *aligned_dst,
+ const uintxlen_t *aligned_src)
+{
+ uintxlen_t dst0 = *aligned_src++;
+ uintxlen_t dst1 = *aligned_src++;
+ uintxlen_t dst2 = *aligned_src++;
+ uintxlen_t dst3 = *aligned_src++;
+ uintxlen_t dst4 = *aligned_src++;
+ uintxlen_t dst5 = *aligned_src++;
+ uintxlen_t dst6 = *aligned_src++;
+ uintxlen_t dst7 = *aligned_src++;
+ uintxlen_t dst8 = *aligned_src;
+
+ *aligned_dst++ = dst0;
+ *aligned_dst++ = dst1;
+ *aligned_dst++ = dst2;
+ *aligned_dst++ = dst3;
+ *aligned_dst++ = dst4;
+ *aligned_dst++ = dst5;
+ *aligned_dst++ = dst6;
+ *aligned_dst++ = dst7;
+ *aligned_dst = dst8;
+}
+
+static inline void
+__libc_memmove_bytewise_forward_copy (unsigned char *dst,
+ const unsigned char *src, size_t length)
+{
+ while (length--)
+ {
+ *dst++ = *src++;
+ }
+}
+
+void *__inhibit_loop_to_libcall
+memmove (void *dst_void, const void *src_void, size_t length)
+{
+ unsigned char *dst = dst_void;
+ const unsigned char *src = src_void;
+ uintxlen_t *aligned_dst;
+ const uintxlen_t *aligned_src;
+
+ if (src <= dst)
+ {
+ if (dst < src + length) /* Memory areas overlap destructively, have to
+ copy backwards. */
+ {
+ src += length;
+ dst += length;
+
+ if (length >= SZREG)
+ {
+ if (__libc_fast_xlen_aligned (dst, src))
+ {
+ aligned_dst = (uintxlen_t *)dst;
+ aligned_src = (uintxlen_t *)src;
+
+ /* If possible, unroll the word-copy loop by a factor 9 to
+ match memcpy. This speeds up the copying process for
+ longer lengths while barely degrading performance for
+ lengths < SZREG*9. Since we are copying backwards,
+ decrement the addresses before copying.
+ */
+ while (length >= SZREG * 9)
+ {
+ aligned_dst -= 9;
+ aligned_src -= 9;
+ __libc_aligned_copy_unrolled (aligned_dst, aligned_src);
+ length -= (SZREG * 9);
+ }
+
+ while (length >= SZREG)
+ {
+ *--aligned_dst = *--aligned_src;
+ length -= SZREG;
+ }
+
+ /* Pick up any residual with a byte copier. */
+ dst = (unsigned char *)aligned_dst;
+ src = (unsigned char *)aligned_src;
+ }
+#if !defined(__riscv_misaligned_fast)
+ else if (length > (SZREG * 2))
+ {
+ /* At least one address is not xlen-aligned. If
+ misaligned accesses are slow or prohibited,
+ align the src so we can load SZREG bytes at a time.
+ This reduces the amount of memory accesses made
+ and therefore improves performance.
+ */
+ while ((uintxlen_t)src & (SZREG - 1))
+ {
+ *--dst = *--src;
+ length--;
+ }
+
+ aligned_src = (uintxlen_t *)src;
+
+ /* Decrement the addresses before copying since
+ we are copying backwards. */
+ do
+ {
+ aligned_src--;
+ dst -= SZREG;
+ __libc_memmove_misaligned_copy (dst, aligned_src);
+ length -= SZREG;
+ }
+ while (length >= SZREG);
+
+ /* Pick up any residual with a byte copier. */
+ src = (unsigned char *)aligned_src;
+ }
+#endif
+ }
+ while (length--)
+ {
+ *--dst = *--src;
+ }
+
+ return dst_void;
+ }
+ }
+ else if (src < dst + length) /* Memory areas overlap non-destructively. */
+ {
+ if (length >= SZREG)
+ {
+ if (__libc_fast_xlen_aligned (dst, src))
+ {
+ aligned_dst = (uintxlen_t *)dst;
+ aligned_src = (uintxlen_t *)src;
+
+ /* If possible, unroll the word-copy loop by a factor 9 to
+ match memcpy. This speeds up the copying process for longer
+ lengths while barely degrading performance for lengths <
+ SZREG*9.
+ */
+ while (length >= SZREG * 9)
+ {
+ __libc_aligned_copy_unrolled (aligned_dst, aligned_src);
+ aligned_dst += 9;
+ aligned_src += 9;
+ length -= (SZREG * 9);
+ }
+
+ while (length >= SZREG)
+ {
+ *aligned_dst++ = *aligned_src++;
+ length -= SZREG;
+ }
+
+ /* Pick up any residual with a byte copier. */
+ dst = (unsigned char *)aligned_dst;
+ src = (unsigned char *)aligned_src;
+ }
+#if !defined(__riscv_misaligned_fast)
+ else if (length > (SZREG * 2))
+ {
+ /* At least one address is not xlen-aligned. If
+ misaligned accesses are slow or prohibited,
+ align the src so we can load SZREG bytes at a time.
+ This reduces the amount of memory accesses made
+ and therefore improves performance.
+ */
+ while ((uintxlen_t)src & (SZREG - 1))
+ {
+ *dst++ = *src++;
+ length--;
+ }
+
+ aligned_src = (uintxlen_t *)src;
+
+ do
+ {
+ __libc_memmove_misaligned_copy (dst, aligned_src);
+ aligned_src++;
+ dst += SZREG;
+ length -= SZREG;
+ }
+ while (length >= SZREG);
+
+ /* Pick up any residual with a byte copier. */
+ src = (unsigned char *)aligned_src;
+ }
+#endif
+ }
+
+ __libc_memmove_bytewise_forward_copy (dst, src, length);
+ return dst_void;
+ }
+
+ /* Memory areas do not overlap, redirect to memcpy.
+ Copy byte-by-byte for lengths <= SZREG to reduce
+ overhead on very short copies.
+ */
+ if (length > SZREG)
+ {
+ return memcpy (dst_void, src_void, length);
+ }
+ else
+ {
+ __libc_memmove_bytewise_forward_copy (dst, src, length);
+ return dst_void;
+ }
+}
+#endif
diff --git a/newlib/libc/machine/riscv/memrchr.c b/newlib/libc/machine/riscv/memrchr.c
new file mode 100644
index 0000000..47e1023
--- /dev/null
+++ b/newlib/libc/machine/riscv/memrchr.c
@@ -0,0 +1,172 @@
+/*
+FUNCTION
+ <<memrchr>>---reverse search for character in memory
+
+INDEX
+ memrchr
+
+SYNOPSIS
+ #include <string.h>
+ void *memrchr(const void *<[src]>, int <[c]>, size_t <[length]>);
+
+DESCRIPTION
+ This function searches memory starting at <[length]> bytes
+ beyond <<*<[src]>>> backwards for the character <[c]>.
+ The search only ends with the first occurrence of <[c]>; in
+ particular, <<NUL>> does not terminate the search.
+
+RETURNS
+ If the character <[c]> is found within <[length]> characters
+ of <<*<[src]>>>, a pointer to the character is returned. If
+ <[c]> is not found, then <<NULL>> is returned.
+
+PORTABILITY
+<<memrchr>> is a GNU extension.
+
+<<memrchr>> requires no supporting OS subroutines.
+
+QUICKREF
+ memrchr
+*/
+
+#include <sys/asm.h>
+#include <stddef.h>
+#include "rv_string.h"
+
+// Move size
+#if __riscv_zilsd
+#define MV_SZ 8
+
+// Offset is only 4 bytes for Zilsd/Zclsd since each register is 32 bits
+#define OFFSET 4
+#else
+#define MV_SZ SZREG
+#define OFFSET SZREG
+#endif
+
+
+void *
+memrchr (const void *src_void,
+ int c,
+ size_t length)
+{
+ const unsigned char *src = (const unsigned char *) src_void;
+ unsigned char d = c;
+
+ if (length) src += length - 1;
+
+#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+
+ /*
+ We add one to the address because even if an address is already aligned,
+ when loading words the bytes preceding this address are read, so check
+ the single byte.
+
+ If the address has all the least significant bits set equaling MV_SZ - 1,
+ and has a length of at least MV_SZ, we can read a word starting from
+ src & ~(MV_SZ - 1) because no alignment is actually required
+ */
+ size_t align = (uintptr_t) (src + 1) & (MV_SZ - 1);
+
+ if (align)
+ {
+ if (length < align) align = length;
+
+ switch (align)
+ {
+#if MV_SZ == 8
+ case 7:
+ if (*src-- == d) return (void *) (src + 1);
+ case 6:
+ if (*src-- == d) return (void *) (src + 1);
+ case 5:
+ if (*src-- == d) return (void *) (src + 1);
+ case 4:
+ if (*src-- == d) return (void *) (src + 1);
+#endif /* MV_SZ == 8 */
+ case 3:
+ if (*src-- == d) return (void *) (src + 1);
+ case 2:
+ if (*src-- == d) return (void *) (src + 1);
+ case 1:
+ if (*src-- == d) return (void *) (src + 1);
+ }
+
+ length -= align;
+ }
+
+ const unsigned char *end_addr = src - (length & ~(MV_SZ - 1));
+
+ if (src > end_addr)
+ {
+ src -= MV_SZ - 1;
+
+ uintxlen_t mask = __libc_splat_byte(d);
+
+ do
+ {
+ uintlslen_t val = *(uintlslen_t*) src;
+
+#if __riscv_zilsd
+ uintxlen_t word2 = val >> 32;
+ word2 ^= mask;
+
+ if (__libc_detect_null(word2))
+ {
+#if __riscv_zbb
+ src += OFFSET;
+ word2 = ~__LIBC_RISCV_ZBB_ORC_B(word2);
+ word2 = __LIBC_RISCV_ZBB_CNT_Z_REV(word2);
+
+ return (void *) (src + OFFSET - 1 - (word2 >> 3));
+#else /* not __riscv_zbb */
+ src += MV_SZ - 1;
+ if (*src-- == d) return (void *) (src + 1);
+ if (*src-- == d) return (void *) (src + 1);
+ if (*src-- == d) return (void *) (src + 1);
+ return (void *) src;
+#endif /* __riscv_zbb */
+ }
+#endif /* __riscv_zilsd */
+ uintxlen_t word1 = val ^ mask;
+
+ if (__libc_detect_null(word1))
+ {
+#if __riscv_zbb
+ word1 = ~__LIBC_RISCV_ZBB_ORC_B(word1);
+ word1 = __LIBC_RISCV_ZBB_CNT_Z_REV(word1);
+
+ return (void *) (src + OFFSET - 1 - (word1 >> 3));
+#else /* not __riscv_zbb */
+ src += OFFSET - 1;
+ if (*src-- == d) return (void *) (src + 1);
+ if (*src-- == d) return (void *) (src + 1);
+ if (*src-- == d) return (void *) (src + 1);
+#if __riscv_xlen == 64
+ if (*src-- == d) return (void *) (src + 1);
+ if (*src-- == d) return (void *) (src + 1);
+ if (*src-- == d) return (void *) (src + 1);
+ if (*src-- == d) return (void *) (src + 1);
+#endif /* __riscv_xlen == 64 */
+ return (void *) src;
+#endif /* __riscv_zbb */
+ }
+
+ src -= MV_SZ;
+ } while (src > end_addr);
+
+ length &= MV_SZ - 1;
+ src = end_addr;
+ }
+
+#endif /* not PREFER_SIZE_OVER_SPEED */
+
+ while (length--)
+ {
+ if (*src == d)
+ return (void *) src;
+ src--;
+ }
+
+ return NULL;
+}
diff --git a/newlib/libc/machine/riscv/memset.S b/newlib/libc/machine/riscv/memset.S
index a717ae7..533f667 100644
--- a/newlib/libc/machine/riscv/memset.S
+++ b/newlib/libc/machine/riscv/memset.S
@@ -9,105 +9,296 @@
http://www.opensource.org/licenses.
*/
+#include <sys/asm.h>
+
+
+#define BYTE_TBL_SZ 31
+#define WORD_TBL_SZ 32
+
+#if __riscv_zilsd
+/* Move size */
+#define MV_SZ 8
+
+/* Store instruction */
+#define RG_ST sd
+
+/* Zilsd and Zclsd require an even numbered register */
+#define REG_SPLAT a4
+#else
+#define MV_SZ SZREG
+#define RG_ST REG_S
+#define REG_SPLAT a1
+#endif
+
+/*
+ Use an extended register for Zilsd and Zclsd if available
+ since a5 is used for the odd numbered register, in order
+ to eliminate an li instruction
+*/
+#if __riscv_zilsd && !__riscv_abi_rve
+#define REG_TABLE a6
+#else
+#define REG_TABLE a5
+#endif
+
+
.text
.global memset
-.type memset, @function
+.type memset, @function
+
+/* void *memset(void *s, int c, size_t n); */
+
+
memset:
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
- mv t1, a0
- beqz a2, 2f
+ mv a3, a0
+ beqz a2, .Ldone
-1:
- sb a1, 0(t1)
- add a2, a2, -1
- add t1, t1, 1
- bnez a2, 1b
+.Lset:
+ sb a1, 0(a3)
+ addi a2, a2, -1
+ addi a3, a3, 1
+ bnez a2, .Lset
-2:
+.Ldone:
ret
#else
- li t1, 15
- move a4, a0
- bleu a2, t1, .Ltiny
- and a5, a4, 15
- bnez a5, .Lmisaligned
+ li REG_TABLE, BYTE_TBL_SZ
+ mv a3, a0
+
+ /* If there aren't many bytes, copy them individually to reduce overhead */
+ bleu a2, REG_TABLE, .Lcopy_bytes
+
+ and a4, a3, MV_SZ - 1
+ beqz a4, .Lword_check
+
+ /*
+ Jump into the byte table depending on the number of bytes that need to be
+ written
+ */
+1:
+ auipc t0, %pcrel_hi(.Ltable_misaligned)
+
+ /*
+ Instructions in the tables are forced to be four bytes, so scale count
+ by 4
+ */
+#if __riscv_zba
+ sh2add t0, a4, t0
+#else
+ sll t1, a4, 2
+ add t0, t0, t1
+#endif
-.Laligned:
- bnez a1, .Lwordify
+ /* Save the return address because we aren't exiting the function yet */
+ mv t1, ra
+ jalr t0, %pcrel_lo(1b)
-.Lwordified:
- and a3, a2, ~15
- and a2, a2, 15
- add a3, a3, a4
+ /* Update pointer and count by what was written */
+ mv ra, t1
+ add a4, a4, -MV_SZ
+ add a2, a2, a4
+ sub a3, a3, a4
+ /* Access is now aligned. Check we can copy words. */
+ bleu a2, REG_TABLE, .Lcopy_bytes
+
+.Lword_check:
+ /* Don't need to splat special case of zero */
+ bnez a1, .Lsplat_byte
+#if __riscv_zilsd
+ mv REG_SPLAT, a1
+#endif
+ j .Lcopy_words_init
+
+/*
+ Align labels to four bytes after unconditional jumps to avoid any
+ penalties when jumping to 32-bit instructions that aren't 4-byte
+ aligned
+*/
+.p2align 2
+.Lsplat_byte:
+#if __riscv_zbkb
+ packh REG_SPLAT, a1, a1
#if __riscv_xlen == 64
-1:sd a1, 0(a4)
- sd a1, 8(a4)
+ packw REG_SPLAT, REG_SPLAT, REG_SPLAT
+#endif
+ pack REG_SPLAT, REG_SPLAT, REG_SPLAT
#else
-1:sw a1, 0(a4)
- sw a1, 4(a4)
- sw a1, 8(a4)
- sw a1, 12(a4)
+ and a1, a1, 0xFF
+ sll t0, a1, 8
+ or a1, a1, t0
+ sll t0, a1, 16
+ or REG_SPLAT, a1, t0
+#if __riscv_xlen == 64
+ sll t0, REG_SPLAT, 32
+ or REG_SPLAT, REG_SPLAT, t0
+#endif
#endif
- add a4, a4, 16
- bltu a4, a3, 1b
- bnez a2, .Ltiny
- ret
+.Lcopy_words_init:
+#if __riscv_zilsd
+ /* Odd register of even-odd pair */
+ mv a5, REG_SPLAT
+#endif
+
+ /* Calculate end address */
+ and t0, a2, ~(MV_SZ - 1)
+ add t1, a3, t0
+
+ /*
+ The idea behind the table of word copies is that first we calculate any
+ remainder of bytes that need to be copied by the table that aren't an
+ entire table length. That's copied first. After that, runs of the entire
+ table are performed.
+ */
+ and t0, t0, (WORD_TBL_SZ - 1) * MV_SZ
+
+ /* Skip if there's no remainder */
+ beqz t0, .Ltable_bigly
+ neg t0, t0
+ add t0, t0, WORD_TBL_SZ * MV_SZ
+
+ /* Adjust start address with offset */
+ sub a3, a3, t0
+
+1:
+ auipc t2, %pcrel_hi(.Ltable_bigly)
+
+#if MV_SZ == 8
+ /*
+ If eight bytes are being copied with each store, we need to divide
+ the table offset in half
+ */
+ srl t0, t0, 1
+#endif
+
+ add t2, t2, t0
+ jr t2, %pcrel_lo(1b)
-.Ltiny:
- sub a3, t1, a2
- sll a3, a3, 2
-1:auipc t0, %pcrel_hi(.Ltable)
- add a3, a3, t0
+.p2align 2
+.Ltable_bigly:
+/*
+ Force the instructions to be four bytes to avoid an extra instruction
+ that would be needed to halve the offset for sw
+*/
.option push
.option norvc
-.Ltable_misaligned:
- jr a3, %pcrel_lo(1b)
-.Ltable:
- sb a1,14(a4)
- sb a1,13(a4)
- sb a1,12(a4)
- sb a1,11(a4)
- sb a1,10(a4)
- sb a1, 9(a4)
- sb a1, 8(a4)
- sb a1, 7(a4)
- sb a1, 6(a4)
- sb a1, 5(a4)
- sb a1, 4(a4)
- sb a1, 3(a4)
- sb a1, 2(a4)
- sb a1, 1(a4)
- sb a1, 0(a4)
+ RG_ST REG_SPLAT, MV_SZ*0(a3)
+ RG_ST REG_SPLAT, MV_SZ*1(a3)
+ RG_ST REG_SPLAT, MV_SZ*2(a3)
+ RG_ST REG_SPLAT, MV_SZ*3(a3)
+ RG_ST REG_SPLAT, MV_SZ*4(a3)
+ RG_ST REG_SPLAT, MV_SZ*5(a3)
+ RG_ST REG_SPLAT, MV_SZ*6(a3)
+ RG_ST REG_SPLAT, MV_SZ*7(a3)
+ RG_ST REG_SPLAT, MV_SZ*8(a3)
+ RG_ST REG_SPLAT, MV_SZ*9(a3)
+ RG_ST REG_SPLAT, MV_SZ*10(a3)
+ RG_ST REG_SPLAT, MV_SZ*11(a3)
+ RG_ST REG_SPLAT, MV_SZ*12(a3)
+ RG_ST REG_SPLAT, MV_SZ*13(a3)
+ RG_ST REG_SPLAT, MV_SZ*14(a3)
+ RG_ST REG_SPLAT, MV_SZ*15(a3)
+ RG_ST REG_SPLAT, MV_SZ*16(a3)
+ RG_ST REG_SPLAT, MV_SZ*17(a3)
+ RG_ST REG_SPLAT, MV_SZ*18(a3)
+ RG_ST REG_SPLAT, MV_SZ*19(a3)
+ RG_ST REG_SPLAT, MV_SZ*20(a3)
+ RG_ST REG_SPLAT, MV_SZ*21(a3)
+ RG_ST REG_SPLAT, MV_SZ*22(a3)
+ RG_ST REG_SPLAT, MV_SZ*23(a3)
+ RG_ST REG_SPLAT, MV_SZ*24(a3)
+ RG_ST REG_SPLAT, MV_SZ*25(a3)
+ RG_ST REG_SPLAT, MV_SZ*26(a3)
+ RG_ST REG_SPLAT, MV_SZ*27(a3)
+ RG_ST REG_SPLAT, MV_SZ*28(a3)
+ RG_ST REG_SPLAT, MV_SZ*29(a3)
+ RG_ST REG_SPLAT, MV_SZ*30(a3)
+ RG_ST REG_SPLAT, MV_SZ*31(a3)
.option pop
- ret
-.Lwordify:
- and a1, a1, 0xFF
- sll a3, a1, 8
- or a1, a1, a3
- sll a3, a1, 16
- or a1, a1, a3
-#if __riscv_xlen == 64
- sll a3, a1, 32
- or a1, a1, a3
+ /* Update the pointer and copy data if needed */
+ add a3, a3, MV_SZ * WORD_TBL_SZ
+ bltu a3, t1, .Ltable_bigly
+
+ /* Copy any remaining bytes */
+ and a2, a2, MV_SZ - 1
+ beqz a2, .Lexit
+
+#if __riscv_zilsd && __riscv_abi_rve
+ /* Restore table size if necessary */
+ li REG_TABLE, BYTE_TBL_SZ
#endif
- j .Lwordified
-
-.Lmisaligned:
- sll a3, a5, 2
-1:auipc t0, %pcrel_hi(.Ltable_misaligned)
- add a3, a3, t0
- mv t0, ra
- jalr a3, %pcrel_lo(1b)
- mv ra, t0
-
- add a5, a5, -16
- sub a4, a4, a5
- add a2, a2, a5
- bleu a2, t1, .Ltiny
- j .Laligned
+
+.Lcopy_bytes:
+ auipc t0, %pcrel_hi(.Ltable_tiny)
+
+ sub a2, REG_TABLE, a2
+
+ /*
+ Instructions in the tables are forced to be four bytes, so scale count
+ by 4
+ */
+#if __riscv_zba
+ sh2add t0, a2, t0
+#else
+ sll a2, a2, 2
+ add t0, t0, a2
+#endif
+
+ /* Don't save the return address because we're exiting after the jump */
+ jr t0, %pcrel_lo(.Lcopy_bytes)
+
+.p2align 2
+.Ltable_tiny:
+/*
+ norvc is needed because the immediate is only two bits in size for c.sb,
+ and without it the table would have a mix of 2- and 4-byte instructions
+ when Zcb is available
+*/
+.option push
+.option norvc
+ sb a1, 30(a3)
+ sb a1, 29(a3)
+ sb a1, 28(a3)
+ sb a1, 27(a3)
+ sb a1, 26(a3)
+ sb a1, 25(a3)
+ sb a1, 24(a3)
+ sb a1, 23(a3)
+ sb a1, 22(a3)
+ sb a1, 21(a3)
+ sb a1, 20(a3)
+ sb a1, 19(a3)
+ sb a1, 18(a3)
+ sb a1, 17(a3)
+ sb a1, 16(a3)
+ sb a1, 15(a3)
+ sb a1, 14(a3)
+ sb a1, 13(a3)
+ sb a1, 12(a3)
+ sb a1, 11(a3)
+ sb a1, 10(a3)
+ sb a1, 9(a3)
+ sb a1, 8(a3)
+#if MV_SZ == 8
+.Ltable_misaligned:
+#endif
+ sb a1, 7(a3)
+ sb a1, 6(a3)
+ sb a1, 5(a3)
+ sb a1, 4(a3)
+#if MV_SZ == 4
+.Ltable_misaligned:
+#endif
+ sb a1, 3(a3)
+ sb a1, 2(a3)
+ sb a1, 1(a3)
+ sb a1, 0(a3)
+.option pop
+.Lexit:
+ ret
#endif
- .size memset, .-memset
+.size memset, .-memset
diff --git a/newlib/libc/machine/riscv/rv_string.h b/newlib/libc/machine/riscv/rv_string.h
index 362f66a..dc2a26d 100644
--- a/newlib/libc/machine/riscv/rv_string.h
+++ b/newlib/libc/machine/riscv/rv_string.h
@@ -20,20 +20,24 @@
// Determine which intrinsics to use based on XLEN and endianness
#if __riscv_xlen == 64
- #define __LIBC_RISCV_ZBB_ORC_B(x) __riscv_orc_b_64(x)
+ #define __LIBC_RISCV_ZBB_ORC_B(x) __riscv_orc_b_64(x)
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- #define __LIBC_RISCV_ZBB_CNT_Z(x) __riscv_ctz_64(x)
+ #define __LIBC_RISCV_ZBB_CNT_Z(x) __riscv_ctz_64(x)
+ #define __LIBC_RISCV_ZBB_CNT_Z_REV(x) __riscv_clz_64(x)
#else
- #define __LIBC_RISCV_ZBB_CNT_Z(x) __riscv_clz_64(x)
+ #define __LIBC_RISCV_ZBB_CNT_Z(x) __riscv_clz_64(x)
+ #define __LIBC_RISCV_ZBB_CNT_Z_REV(x) __riscv_ctz_64(x)
#endif
#else
- #define __LIBC_RISCV_ZBB_ORC_B(x) __riscv_orc_b_32(x)
+ #define __LIBC_RISCV_ZBB_ORC_B(x) __riscv_orc_b_32(x)
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- #define __LIBC_RISCV_ZBB_CNT_Z(x) __riscv_ctz_32(x)
+ #define __LIBC_RISCV_ZBB_CNT_Z(x) __riscv_ctz_32(x)
+ #define __LIBC_RISCV_ZBB_CNT_Z_REV(x) __riscv_clz_32(x)
#else
- #define __LIBC_RISCV_ZBB_CNT_Z(x) __riscv_clz_32(x)
+ #define __LIBC_RISCV_ZBB_CNT_Z(x) __riscv_clz_32(x)
+ #define __LIBC_RISCV_ZBB_CNT_Z_REV(x) __riscv_ctz_32(x)
#endif
#endif
#endif
@@ -82,8 +86,8 @@ static __inline char *__libc_strcpy(char *dst, const char *src, bool ret_start)
if (!(*dst++ = src[0])) return dst0;
if (!(*dst++ = src[1])) return dst0;
if (!(*dst++ = src[2])) return dst0;
- if (!(*dst++ = src[3])) return dst0;
#if __riscv_xlen == 64
+ if (!(*dst++ = src[3])) return dst0;
if (!(*dst++ = src[4])) return dst0;
if (!(*dst++ = src[5])) return dst0;
if (!(*dst++ = src[6])) return dst0;
@@ -94,13 +98,13 @@ static __inline char *__libc_strcpy(char *dst, const char *src, bool ret_start)
if (!(*dst++ = src[0])) return dst - 1;
if (!(*dst++ = src[1])) return dst - 1;
if (!(*dst++ = src[2])) return dst - 1;
- if (!(*dst++ = src[3])) return dst - 1;
#if __riscv_xlen == 64
+ if (!(*dst++ = src[3])) return dst - 1;
if (!(*dst++ = src[4])) return dst - 1;
if (!(*dst++ = src[5])) return dst - 1;
if (!(*dst++ = src[6])) return dst - 1;
- dst0 = dst;
#endif
+ dst0 = dst;
}
*dst = 0;
@@ -121,4 +125,33 @@ static __inline char *__libc_strcpy(char *dst, const char *src, bool ret_start)
}
+static __inline uintxlen_t __libc_splat_byte(unsigned char c)
+{
+ uintxlen_t val;
+
+#if __riscv_zbkb
+ asm volatile ("packh %0, %1, %1"
+ : "=r" (val)
+ : "r" (c)
+ );
+#if __riscv_xlen == 64
+ asm volatile ("packw %0, %0, %0"
+ : "+r" (val)
+ );
+#endif /* __riscv_xlen == 64 */
+ asm volatile ("pack %0, %0, %0"
+ : "+r" (val)
+ );
+#else /* not __riscv_zbkb */
+ val = (c << 8) | c;
+ val = (val << 16) | val;
+#if __riscv_xlen == 64
+ val = (val << 32) | val;
+#endif /* __riscv_xlen == 64 */
+#endif /* __riscv_zbkb */
+
+ return val;
+}
+
+
#endif /* _RV_STRING_H */
diff --git a/newlib/libc/machine/riscv/setjmp.S b/newlib/libc/machine/riscv/setjmp.S
index eef242e..f2b5053 100644
--- a/newlib/libc/machine/riscv/setjmp.S
+++ b/newlib/libc/machine/riscv/setjmp.S
@@ -16,21 +16,33 @@
.type setjmp, @function
setjmp:
REG_S ra, 0*SZREG(a0)
- REG_S s0, 1*SZREG(a0)
- REG_S s1, 2*SZREG(a0)
+ #if __riscv_xlen == 32 && (__riscv_zilsd) && (__riscv_misaligned_fast)
+ sd s0, 1*SZREG(a0)
+ #else
+ REG_S s0, 1*SZREG(a0)
+ REG_S s1, 2*SZREG(a0)
+ #endif
-#ifndef __riscv_32e
- REG_S s2, 3*SZREG(a0)
- REG_S s3, 4*SZREG(a0)
- REG_S s4, 5*SZREG(a0)
- REG_S s5, 6*SZREG(a0)
- REG_S s6, 7*SZREG(a0)
- REG_S s7, 8*SZREG(a0)
- REG_S s8, 9*SZREG(a0)
- REG_S s9, 10*SZREG(a0)
- REG_S s10,11*SZREG(a0)
- REG_S s11,12*SZREG(a0)
- REG_S sp, 13*SZREG(a0)
+#ifndef __riscv_abi_rve
+ #if __riscv_xlen == 32 && (__riscv_zilsd) && (__riscv_misaligned_fast)
+ sd s2, 3*SZREG(a0)
+ sd s4, 5*SZREG(a0)
+ sd s6, 7*SZREG(a0)
+ sd s8, 9*SZREG(a0)
+ sd s10,11*SZREG(a0)
+ #else
+ REG_S s2, 3*SZREG(a0)
+ REG_S s3, 4*SZREG(a0)
+ REG_S s4, 5*SZREG(a0)
+ REG_S s5, 6*SZREG(a0)
+ REG_S s6, 7*SZREG(a0)
+ REG_S s7, 8*SZREG(a0)
+ REG_S s8, 9*SZREG(a0)
+ REG_S s9, 10*SZREG(a0)
+ REG_S s10,11*SZREG(a0)
+ REG_S s11,12*SZREG(a0)
+ #endif
+ REG_S sp, 13*SZREG(a0)
#else
REG_S sp, 3*SZREG(a0)
#endif
@@ -59,19 +71,31 @@ setjmp:
.type longjmp, @function
longjmp:
REG_L ra, 0*SZREG(a0)
- REG_L s0, 1*SZREG(a0)
- REG_L s1, 2*SZREG(a0)
-#ifndef __riscv_32e
- REG_L s2, 3*SZREG(a0)
- REG_L s3, 4*SZREG(a0)
- REG_L s4, 5*SZREG(a0)
- REG_L s5, 6*SZREG(a0)
- REG_L s6, 7*SZREG(a0)
- REG_L s7, 8*SZREG(a0)
- REG_L s8, 9*SZREG(a0)
- REG_L s9, 10*SZREG(a0)
- REG_L s10,11*SZREG(a0)
- REG_L s11,12*SZREG(a0)
+ #if __riscv_xlen == 32 && (__riscv_zilsd) && (__riscv_misaligned_fast)
+ ld s0, 1*SZREG(a0)
+ #else
+ REG_L s0, 1*SZREG(a0)
+ REG_L s1, 2*SZREG(a0)
+ #endif
+#ifndef __riscv_abi_rve
+ #if __riscv_xlen == 32 && (__riscv_zilsd) && (__riscv_misaligned_fast)
+ ld s2, 3*SZREG(a0)
+ ld s4, 5*SZREG(a0)
+ ld s6, 7*SZREG(a0)
+ ld s8, 9*SZREG(a0)
+ ld s10,11*SZREG(a0)
+ #else
+ REG_L s2, 3*SZREG(a0)
+ REG_L s3, 4*SZREG(a0)
+ REG_L s4, 5*SZREG(a0)
+ REG_L s5, 6*SZREG(a0)
+ REG_L s6, 7*SZREG(a0)
+ REG_L s7, 8*SZREG(a0)
+ REG_L s8, 9*SZREG(a0)
+ REG_L s9, 10*SZREG(a0)
+ REG_L s10,11*SZREG(a0)
+ REG_L s11,12*SZREG(a0)
+ #endif
REG_L sp, 13*SZREG(a0)
#else
REG_L sp, 3*SZREG(a0)
diff --git a/newlib/libc/machine/riscv/strcmp.S b/newlib/libc/machine/riscv/strcmp.S
index cc29b7b..0b1dfc4 100644
--- a/newlib/libc/machine/riscv/strcmp.S
+++ b/newlib/libc/machine/riscv/strcmp.S
@@ -16,15 +16,15 @@
.type strcmp, @function
strcmp:
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
-1:
+.Lcompare:
lbu a2, 0(a0)
lbu a3, 0(a1)
- add a0, a0, 1
- add a1, a1, 1
- bne a2, a3, 2f
- bnez a2, 1b
+ addi a0, a0, 1
+ addi a1, a1, 1
+ bne a2, a3, .Lreturn_diff
+ bnez a2, .Lcompare
-2:
+.Lreturn_diff:
sub a0, a2, a3
ret
@@ -48,12 +48,16 @@ strcmp:
REG_L a2, \i*SZREG(a0)
REG_L a3, \i*SZREG(a1)
- and t0, a2, a5
- or t1, a2, a5
- add t0, t0, a5
- or t0, t0, t1
+ #if __riscv_zbb
+ orc.b a4, a2
+ #else
+ and a4, a2, a5
+ or t1, a2, a5
+ add a4, a4, a5
+ or a4, a4, t1
+ #endif
- bne t0, t2, .Lnull\i
+ bne a4, t2, .Lnull\i
.if \i+1-\n
bne a2, a3, .Lmismatch
.else
@@ -95,73 +99,109 @@ strcmp:
.Lmismatch:
# words don't match, but a2 has no null byte.
+ #if __riscv_zbb
+ xor a4, a2, a3 # find differing bits
+
+ # Check system endianness
+ # If little-endian, use Count Trailing Zeros (ctz)
+ # If big-endian, use Count Leading Zeros (clz)
+ # This helps identify the position of the first differing byte between a2 and a3.
+
+ # For example, in little-endian, least significant byte comes first.
+ # So trailing zeros help find which byte position differs.
+
+ # In big-endian, most significant byte comes first, so leading zeros are used.
+ # The position will then be used to extract the differing byte.
+
+ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ ctz a5, a4
+ #else
+ clz a5, a4
+ #endif
+
+ andi a5, a5, -8 # find position of bit offset to the start of the byte where the first difference occurs
+
+
+ # Shift a2 and a3 right by a5 bits to bring the target byte to the LSB, and isolate the byte of interest
+ srl a2, a2, a5
+ and a2, a2, 0xff
+
+ srl a3, a3, a5
+ and a3, a3, 0xff
+
+
+ sub a0, a2, a3 # Calculate and return the difference in the isolated bytes
+ ret
+
+ #else
+ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ #if __riscv_xlen == 64
+ sll a4, a2, 48
+ sll a5, a3, 48
+ bne a4, a5, .Lmismatch_upper
+ sll a4, a2, 32
+ sll a5, a3, 32
+ bne a4, a5, .Lmismatch_upper
+ #endif
+ sll a4, a2, 16
+ sll a5, a3, 16
+ bne a4, a5, .Lmismatch_upper
+
+ srl a4, a2, 8*SZREG-16
+ srl a5, a3, 8*SZREG-16
+ sub a0, a4, a5
+ and a1, a0, 0xff
+ bnez a1, .Lfinal_upper_diff
+ ret
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-
-#if __riscv_xlen == 64
- sll a4, a2, 48
- sll a5, a3, 48
- bne a4, a5, .Lmismatch_upper
- sll a4, a2, 32
- sll a5, a3, 32
- bne a4, a5, .Lmismatch_upper
-#endif
- sll a4, a2, 16
- sll a5, a3, 16
- bne a4, a5, .Lmismatch_upper
-
- srl a4, a2, 8*SZREG-16
- srl a5, a3, 8*SZREG-16
- sub a0, a4, a5
- and a1, a0, 0xff
- bnez a1, 1f
- ret
-
-.Lmismatch_upper:
- srl a4, a4, 8*SZREG-16
- srl a5, a5, 8*SZREG-16
- sub a0, a4, a5
- and a1, a0, 0xff
- bnez a1, 1f
- ret
-
-1:and a4, a4, 0xff
- and a5, a5, 0xff
- sub a0, a4, a5
- ret
-
-#else
-
-#if __riscv_xlen == 64
- srl a4, a2, 48
- srl a5, a3, 48
- bne a4, a5, .Lmismatch_lower
- srl a4, a2, 32
- srl a5, a3, 32
- bne a4, a5, .Lmismatch_lower
-#endif
- srl a4, a2, 16
- srl a5, a3, 16
- bne a4, a5, .Lmismatch_lower
-
- srl a4, a2, 8
- srl a5, a3, 8
- bne a4, a5, 1f
- and a4, a2, 0xff
- and a5, a3, 0xff
-1:sub a0, a4, a5
- ret
-
-.Lmismatch_lower:
- srl a2, a4, 8
- srl a3, a5, 8
- bne a2, a3, 1f
- and a2, a4, 0xff
- and a3, a5, 0xff
-1:sub a0, a2, a3
- ret
-
-#endif
+ .Lmismatch_upper:
+ srl a4, a4, 8*SZREG-16
+ srl a5, a5, 8*SZREG-16
+ sub a0, a4, a5
+ and a1, a0, 0xff
+ bnez a1, .Lfinal_upper_diff
+ ret
+
+ .Lfinal_upper_diff:
+ and a4, a4, 0xff
+ and a5, a5, 0xff
+ sub a0, a4, a5
+ ret
+ #else
+ #if __riscv_xlen == 64
+ srl a4, a2, 48
+ srl a5, a3, 48
+ bne a4, a5, .Lmismatch_lower
+ srl a4, a2, 32
+ srl a5, a3, 32
+ bne a4, a5, .Lmismatch_lower
+ #endif
+ srl a4, a2, 16
+ srl a5, a3, 16
+ bne a4, a5, .Lmismatch_lower
+
+ srl a4, a2, 8
+ srl a5, a3, 8
+ bne a4, a5, .Lbyte_diff
+ and a4, a2, 0xff
+ and a5, a3, 0xff
+
+ .Lbyte_diff:
+ sub a0, a4, a5
+ ret
+
+ .Lmismatch_lower:
+ srl a2, a4, 8
+ srl a3, a5, 8
+ bne a2, a3, .Lfinal_lower_diff
+ and a2, a4, 0xff
+ and a3, a5, 0xff
+
+ .Lfinal_lower_diff:
+ sub a0, a2, a3
+ ret
+ #endif
+ #endif
.Lmisaligned:
# misaligned
@@ -169,10 +209,10 @@ strcmp:
lbu a3, 0(a1)
add a0, a0, 1
add a1, a1, 1
- bne a2, a3, 1f
+ bne a2, a3, .Lmisaligned_diff
bnez a2, .Lmisaligned
-1:
+.Lmisaligned_diff:
sub a0, a2, a3
ret
diff --git a/newlib/libc/machine/riscv/strlen.c b/newlib/libc/machine/riscv/strlen.c
index 9bfd2a1..8ab5ce5 100644
--- a/newlib/libc/machine/riscv/strlen.c
+++ b/newlib/libc/machine/riscv/strlen.c
@@ -9,6 +9,7 @@
http://www.opensource.org/licenses.
*/
+#include <sys/types.h>
#include <string.h>
#include <stdint.h>
#include "rv_string.h"
@@ -38,7 +39,9 @@ size_t strlen(const char *str)
asm volatile ("" : "+r"(ps)); /* prevent "optimization" */
str = (const char *)ps;
- size_t ret = str - start, sp = sizeof (*ps);
+
+ size_t ret = str - start;
+ ssize_t sp = sizeof (*ps);
#if __riscv_zbb
psval = ~__LIBC_RISCV_ZBB_ORC_B(psval);
@@ -47,16 +50,16 @@ size_t strlen(const char *str)
return ret + (psval >> 3) - sp;
#else
char c0 = str[0 - sp], c1 = str[1 - sp], c2 = str[2 - sp], c3 = str[3 - sp];
- if (c0 == 0) return ret + 0 - sp;
- if (c1 == 0) return ret + 1 - sp;
- if (c2 == 0) return ret + 2 - sp;
- if (c3 == 0) return ret + 3 - sp;
+ if (c0 == 0) return ret + 0 - sp;
+ if (c1 == 0) return ret + 1 - sp;
+ if (c2 == 0) return ret + 2 - sp;
+ if (__riscv_xlen == 32 || c3 == 0) return ret + 3 - sp;
#if __riscv_xlen == 64
c0 = str[4 - sp], c1 = str[5 - sp], c2 = str[6 - sp];
- if (c0 == 0) return ret + 4 - sp;
- if (c1 == 0) return ret + 5 - sp;
- if (c2 == 0) return ret + 6 - sp;
+ if (c0 == 0) return ret + 4 - sp;
+ if (c1 == 0) return ret + 5 - sp;
+ if (c2 == 0) return ret + 6 - sp;
#endif
return ret + 7 - sp;
diff --git a/newlib/libc/machine/riscv/xlenint.h b/newlib/libc/machine/riscv/xlenint.h
index 86363a8..2d444ff 100644
--- a/newlib/libc/machine/riscv/xlenint.h
+++ b/newlib/libc/machine/riscv/xlenint.h
@@ -11,4 +11,11 @@ typedef uint32_t uintxlen_t;
# error __riscv_xlen must equal 32 or 64
#endif
+/* Load/Store length */
+#if __riscv_zilsd
+typedef uint64_t uintlslen_t;
+#else
+typedef uintxlen_t uintlslen_t;
+#endif
+
#endif /* _XLENINT_H */