diff options
author | Alexey Lapshin <alexey.lapshin@espressif.com> | 2023-08-17 02:05:53 +0400 |
---|---|---|
committer | Jeff Johnston <jjohnstn@redhat.com> | 2023-08-17 18:14:15 -0400 |
commit | 7ba103eb1a95703c7803e4ca48e6ba5c8e1f3d56 (patch) | |
tree | 36b6a578131e28363728a2ed685d66348c747d78 /newlib/libc/machine/xtensa/memset.S | |
parent | 65d34484e9097fd9036dd577028423355cb5f5bc (diff) | |
download | newlib-7ba103eb1a95703c7803e4ca48e6ba5c8e1f3d56.zip newlib-7ba103eb1a95703c7803e4ca48e6ba5c8e1f3d56.tar.gz newlib-7ba103eb1a95703c7803e4ca48e6ba5c8e1f3d56.tar.bz2 |
newlib: add Xtensa port
Diffstat (limited to 'newlib/libc/machine/xtensa/memset.S')
-rw-r--r-- | newlib/libc/machine/xtensa/memset.S | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/newlib/libc/machine/xtensa/memset.S b/newlib/libc/machine/xtensa/memset.S new file mode 100644 index 0000000..48b5829 --- /dev/null +++ b/newlib/libc/machine/xtensa/memset.S @@ -0,0 +1,193 @@ +/* ANSI C standard library function memset. + + Copyright (c) 2001-2008 Tensilica Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "xtensa-asm.h" + +/* void *memset (void *dst, int c, size_t length) + + The algorithm is as follows: + + Create a word with c in all byte positions. + + If the destination is aligned, set 16B chunks with a loop, and then + finish up with 8B, 4B, 2B, and 1B stores conditional on the length. + + If the destination is unaligned, align it by conditionally + setting 1B and/or 2B and then go to aligned case. + + This code tries to use fall-through branches for the common + case of an aligned destination (except for the branches to + the alignment labels). */ + + +/* Byte-by-byte set. */ + + .text + .begin schedule + .align XCHAL_INST_FETCH_WIDTH + .literal_position +__memset_aux: + + /* Skip bytes to get proper alignment for three-byte loop */ +.skip XCHAL_INST_FETCH_WIDTH - 3 + +.Lbyteset: +#if XCHAL_HAVE_LOOPS + loopnez a4, 2f +#else + beqz a4, 2f + add a6, a5, a4 // a6 = ending address +#endif +1: s8i a3, a5, 0 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + addi a5, a5, 1 +#if !XCHAL_HAVE_LOOPS + bltu a5, a6, 1b +#endif +2: leaf_return + + +/* Destination is unaligned. */ + + .align 4 + +.Ldst1mod2: // dst is only byte aligned + + /* Do short sizes byte-by-byte. */ + bltui a4, 8, .Lbyteset + + /* Set 1 byte. */ + s8i a3, a5, 0 + addi a5, a5, 1 + addi a4, a4, -1 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + + /* Now retest if dst is aligned. */ + _bbci.l a5, 1, .Ldstaligned + +.Ldst2mod4: // dst has 16-bit alignment + + /* Do short sizes byte-by-byte. */ + bltui a4, 8, .Lbyteset + + /* Set 2 bytes. */ + s16i a3, a5, 0 + addi a5, a5, 2 + addi a4, a4, -2 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + + /* dst is now aligned; return to main algorithm */ + j .Ldstaligned + + + .align 4 + .global memset + .type memset, @function +memset: + leaf_entry sp, 16 + /* a2 = dst, a3 = c, a4 = length */ + + /* Duplicate character into all bytes of word. */ + extui a3, a3, 0, 8 + slli a7, a3, 8 + or a3, a3, a7 + slli a7, a3, 16 + or a3, a3, a7 + + mov a5, a2 // copy dst so that a2 is return value + + /* Check if dst is unaligned. */ + _bbsi.l a2, 0, .Ldst1mod2 + _bbsi.l a2, 1, .Ldst2mod4 +.Ldstaligned: + + /* Get number of loop iterations with 16B per iteration. */ + srli a7, a4, 4 + +#if XTENSA_ESP32_PSRAM_CACHE_FIX + //do not do this if we have less than one iteration to do + beqz a7, 2f + //this seems to work to prefetch the cache line + s32i a3, a5, 0 + nop +#endif + + /* Destination is word-aligned. */ +#if XCHAL_HAVE_LOOPS + loopnez a7, 2f +#else + beqz a7, 2f + slli a6, a7, 4 + add a6, a6, a5 // a6 = end of last 16B chunk +#endif + /* Set 16 bytes per iteration. */ +1: s32i a3, a5, 0 + s32i a3, a5, 4 + s32i a3, a5, 8 + s32i a3, a5, 12 + addi a5, a5, 16 +#if !XCHAL_HAVE_LOOPS + bltu a5, a6, 1b +#endif + + /* Set any leftover pieces smaller than 16B. */ +2: bbci.l a4, 3, 3f + + /* Set 8 bytes. */ + s32i a3, a5, 0 + s32i a3, a5, 4 + addi a5, a5, 8 + +3: bbci.l a4, 2, 4f + + /* Set 4 bytes. */ + s32i a3, a5, 0 + addi a5, a5, 4 + +4: bbci.l a4, 1, 5f + + /* Set 2 bytes. */ + s16i a3, a5, 0 + addi a5, a5, 2 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + +5: bbci.l a4, 0, 6f + + /* Set 1 byte. */ + s8i a3, a5, 0 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif +6: leaf_return + + .end schedule + + .size memset, . - memset |