aboutsummaryrefslogtreecommitdiff
path: root/newlib/libc/machine/xtensa/memset.S
diff options
context:
space:
mode:
authorAlexey Lapshin <alexey.lapshin@espressif.com>2023-08-17 02:05:53 +0400
committerJeff Johnston <jjohnstn@redhat.com>2023-08-17 18:14:15 -0400
commit7ba103eb1a95703c7803e4ca48e6ba5c8e1f3d56 (patch)
tree36b6a578131e28363728a2ed685d66348c747d78 /newlib/libc/machine/xtensa/memset.S
parent65d34484e9097fd9036dd577028423355cb5f5bc (diff)
downloadnewlib-7ba103eb1a95703c7803e4ca48e6ba5c8e1f3d56.zip
newlib-7ba103eb1a95703c7803e4ca48e6ba5c8e1f3d56.tar.gz
newlib-7ba103eb1a95703c7803e4ca48e6ba5c8e1f3d56.tar.bz2
newlib: add Xtensa port
Diffstat (limited to 'newlib/libc/machine/xtensa/memset.S')
-rw-r--r--newlib/libc/machine/xtensa/memset.S193
1 files changed, 193 insertions, 0 deletions
diff --git a/newlib/libc/machine/xtensa/memset.S b/newlib/libc/machine/xtensa/memset.S
new file mode 100644
index 0000000..48b5829
--- /dev/null
+++ b/newlib/libc/machine/xtensa/memset.S
@@ -0,0 +1,193 @@
+/* ANSI C standard library function memset.
+
+ Copyright (c) 2001-2008 Tensilica Inc.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "xtensa-asm.h"
+
+/* void *memset (void *dst, int c, size_t length)
+
+ The algorithm is as follows:
+
+ Create a word with c in all byte positions.
+
+ If the destination is aligned, set 16B chunks with a loop, and then
+ finish up with 8B, 4B, 2B, and 1B stores conditional on the length.
+
+ If the destination is unaligned, align it by conditionally
+ setting 1B and/or 2B and then go to aligned case.
+
+ This code tries to use fall-through branches for the common
+ case of an aligned destination (except for the branches to
+ the alignment labels). */
+
+
+/* Byte-by-byte set. */
+
+ .text
+ .begin schedule
+ .align XCHAL_INST_FETCH_WIDTH
+ .literal_position
+__memset_aux:
+
+ /* Skip bytes to get proper alignment for three-byte loop */
+.skip XCHAL_INST_FETCH_WIDTH - 3
+
+.Lbyteset:
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, 2f
+#else
+ beqz a4, 2f
+ add a6, a5, a4 // a6 = ending address
+#endif
+1: s8i a3, a5, 0
+#if XTENSA_ESP32_PSRAM_CACHE_FIX
+ memw
+#endif
+ addi a5, a5, 1
+#if !XCHAL_HAVE_LOOPS
+ bltu a5, a6, 1b
+#endif
+2: leaf_return
+
+
+/* Destination is unaligned. */
+
+ .align 4
+
+.Ldst1mod2: // dst is only byte aligned
+
+ /* Do short sizes byte-by-byte. */
+ bltui a4, 8, .Lbyteset
+
+ /* Set 1 byte. */
+ s8i a3, a5, 0
+ addi a5, a5, 1
+ addi a4, a4, -1
+#if XTENSA_ESP32_PSRAM_CACHE_FIX
+ memw
+#endif
+
+ /* Now retest if dst is aligned. */
+ _bbci.l a5, 1, .Ldstaligned
+
+.Ldst2mod4: // dst has 16-bit alignment
+
+ /* Do short sizes byte-by-byte. */
+ bltui a4, 8, .Lbyteset
+
+ /* Set 2 bytes. */
+ s16i a3, a5, 0
+ addi a5, a5, 2
+ addi a4, a4, -2
+#if XTENSA_ESP32_PSRAM_CACHE_FIX
+ memw
+#endif
+
+ /* dst is now aligned; return to main algorithm */
+ j .Ldstaligned
+
+
+ .align 4
+ .global memset
+ .type memset, @function
+memset:
+ leaf_entry sp, 16
+ /* a2 = dst, a3 = c, a4 = length */
+
+ /* Duplicate character into all bytes of word. */
+ extui a3, a3, 0, 8
+ slli a7, a3, 8
+ or a3, a3, a7
+ slli a7, a3, 16
+ or a3, a3, a7
+
+ mov a5, a2 // copy dst so that a2 is return value
+
+ /* Check if dst is unaligned. */
+ _bbsi.l a2, 0, .Ldst1mod2
+ _bbsi.l a2, 1, .Ldst2mod4
+.Ldstaligned:
+
+ /* Get number of loop iterations with 16B per iteration. */
+ srli a7, a4, 4
+
+#if XTENSA_ESP32_PSRAM_CACHE_FIX
+ //do not do this if we have less than one iteration to do
+ beqz a7, 2f
+ //this seems to work to prefetch the cache line
+ s32i a3, a5, 0
+ nop
+#endif
+
+ /* Destination is word-aligned. */
+#if XCHAL_HAVE_LOOPS
+ loopnez a7, 2f
+#else
+ beqz a7, 2f
+ slli a6, a7, 4
+ add a6, a6, a5 // a6 = end of last 16B chunk
+#endif
+ /* Set 16 bytes per iteration. */
+1: s32i a3, a5, 0
+ s32i a3, a5, 4
+ s32i a3, a5, 8
+ s32i a3, a5, 12
+ addi a5, a5, 16
+#if !XCHAL_HAVE_LOOPS
+ bltu a5, a6, 1b
+#endif
+
+ /* Set any leftover pieces smaller than 16B. */
+2: bbci.l a4, 3, 3f
+
+ /* Set 8 bytes. */
+ s32i a3, a5, 0
+ s32i a3, a5, 4
+ addi a5, a5, 8
+
+3: bbci.l a4, 2, 4f
+
+ /* Set 4 bytes. */
+ s32i a3, a5, 0
+ addi a5, a5, 4
+
+4: bbci.l a4, 1, 5f
+
+ /* Set 2 bytes. */
+ s16i a3, a5, 0
+ addi a5, a5, 2
+#if XTENSA_ESP32_PSRAM_CACHE_FIX
+ memw
+#endif
+
+5: bbci.l a4, 0, 6f
+
+ /* Set 1 byte. */
+ s8i a3, a5, 0
+#if XTENSA_ESP32_PSRAM_CACHE_FIX
+ memw
+#endif
+6: leaf_return
+
+ .end schedule
+
+ .size memset, . - memset