diff options
author | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2023-01-31 15:46:08 -0300 |
---|---|---|
committer | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2023-02-06 16:19:35 -0300 |
commit | 30c1dfde313fa01e195e0759915f1356effdb7d6 (patch) | |
tree | 7a94f097603e6f23d86bc644ab1df577b3006cb6 /string | |
parent | 367c31b5d61164db97834917f5487094ebef2f58 (diff) | |
download | glibc-30c1dfde313fa01e195e0759915f1356effdb7d6.zip glibc-30c1dfde313fa01e195e0759915f1356effdb7d6.tar.gz glibc-30c1dfde313fa01e195e0759915f1356effdb7d6.tar.bz2 |
string: Improve generic stpcpy
It follows the strategy:
- Align the destination on word boundary using byte operations.
- If source is also word aligned, read a word per time, check for
null (using has_zero from string-fzb.h), and write the remaining
bytes.
- If source is not word aligned, loop by aligning the source, and
merging the result of two reads. Similar to aligned case,
check for null with has_zero, and write the remaining bytes if
null is found.
Checked on x86_64-linux-gnu, i686-linux-gnu, powerpc64-linux-gnu,
and powerpc-linux-gnu by removing the arch-specific assembly
implementation and disabling multi-arch (it covers both LE and BE
for 64 and 32 bits).
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Diffstat (limited to 'string')
-rw-r--r-- | string/stpcpy.c | 92 |
1 files changed, 86 insertions, 6 deletions
diff --git a/string/stpcpy.c b/string/stpcpy.c index 8df5065..dd0fef1 100644 --- a/string/stpcpy.c +++ b/string/stpcpy.c @@ -15,12 +15,12 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif - #define NO_MEMPCPY_STPCPY_REDIRECT #include <string.h> +#include <stdint.h> +#include <memcopy.h> +#include <string-fzb.h> +#include <string-misc.h> #undef __stpcpy #undef stpcpy @@ -29,12 +29,92 @@ # define STPCPY __stpcpy #endif +static __always_inline char * +write_byte_from_word (op_t *dest, op_t word) +{ + char *d = (char *) dest; + for (size_t i = 0; i < OPSIZ; i++, ++d) + { + char c = extractbyte (word, i); + *d = c; + if (c == '\0') + break; + } + return d; +} + +static __always_inline char * +stpcpy_aligned_loop (op_t *restrict dst, const op_t *restrict src) +{ + op_t word; + while (1) + { + word = *src++; + if (has_zero (word)) + break; + *dst++ = word; + } + + return write_byte_from_word (dst, word); +} + +static __always_inline char * +stpcpy_unaligned_loop (op_t *restrict dst, const op_t *restrict src, + uintptr_t ofs) +{ + op_t w2a = *src++; + uintptr_t sh_1 = ofs * CHAR_BIT; + uintptr_t sh_2 = OPSIZ * CHAR_BIT - sh_1; + + op_t w2 = MERGE (w2a, sh_1, (op_t)-1, sh_2); + if (!has_zero (w2)) + { + op_t w2b; + + /* Unaligned loop. The invariant is that W2B, which is "ahead" of W1, + does not contain end-of-string. Therefore it is safe (and necessary) + to read another word from each while we do not have a difference. */ + while (1) + { + w2b = *src++; + w2 = MERGE (w2a, sh_1, w2b, sh_2); + /* Check if there is zero on w2a. */ + if (has_zero (w2)) + goto out; + *dst++ = w2; + if (has_zero (w2b)) + break; + w2a = w2b; + } + + /* Align the final partial of P2. */ + w2 = MERGE (w2b, sh_1, 0, sh_2); + } + +out: + return write_byte_from_word (dst, w2); +} + + /* Copy SRC to DEST, returning the address of the terminating '\0' in DEST. */ char * STPCPY (char *dest, const char *src) { - size_t len = strlen (src); - return memcpy (dest, src, len + 1) + len; + /* Copy just a few bytes to make DEST aligned. */ + size_t len = (-(uintptr_t) dest) % OPSIZ; + for (; len != 0; len--, ++dest) + { + char c = *src++; + *dest = c; + if (c == '\0') + return dest; + } + + /* DEST is now aligned to op_t, SRC may or may not be. */ + uintptr_t ofs = (uintptr_t) src % OPSIZ; + return ofs == 0 ? stpcpy_aligned_loop ((op_t*) dest, (const op_t *) src) + : stpcpy_unaligned_loop ((op_t*) dest, + (const op_t *) (src - ofs) , ofs); } weak_alias (__stpcpy, stpcpy) libc_hidden_def (__stpcpy) |