aboutsummaryrefslogtreecommitdiff
path: root/newlib/libc/machine
diff options
context:
space:
mode:
authorSteve Ellcey <Steve.Ellcey@imgtec.com>2015-11-02 13:30:19 -0800
committerSteve Ellcey <Steve.Ellcey@imgtec.com>2015-11-02 13:30:19 -0800
commit50455323229d54a19ffc0a6fa2e1f1f24898b9c4 (patch)
treede3e50d2851f718cd1aed2b24928538aa8a5dd03 /newlib/libc/machine
parent7321662bb0db3bb14e9f0214a663cf013330c248 (diff)
downloadnewlib-50455323229d54a19ffc0a6fa2e1f1f24898b9c4.zip
newlib-50455323229d54a19ffc0a6fa2e1f1f24898b9c4.tar.gz
newlib-50455323229d54a19ffc0a6fa2e1f1f24898b9c4.tar.bz2
Improve performance of MIPS memcpy.
* libc/machine/mips/memcpy.S (memcpy): Add word copies for small aligned data.
Diffstat (limited to 'newlib/libc/machine')
-rw-r--r--newlib/libc/machine/mips/memcpy.S45
1 files changed, 43 insertions, 2 deletions
diff --git a/newlib/libc/machine/mips/memcpy.S b/newlib/libc/machine/mips/memcpy.S
index ed0edb4..3130f6e 100644
--- a/newlib/libc/machine/mips/memcpy.S
+++ b/newlib/libc/machine/mips/memcpy.S
@@ -311,7 +311,7 @@ L(memcpy):
* size, copy dst pointer to v0 for the return value.
*/
slti t2,a2,(2 * NSIZE)
- bne t2,zero,L(lastb)
+ bne t2,zero,L(lasts)
#if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH)
move v0,zero
#else
@@ -562,7 +562,7 @@ L(chkw):
*/
L(chk1w):
andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
- beq a2,t8,L(lastb)
+ beq a2,t8,L(lastw)
PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
@@ -574,6 +574,20 @@ L(wordCopy_loop):
bne a0,a3,L(wordCopy_loop)
C_ST REG3,UNIT(-1)(a0)
+/* If we have been copying double words, see if we can copy a single word
+ before doing byte copies. We can have, at most, one word to copy. */
+
+L(lastw):
+#ifdef USE_DOUBLE
+ andi t8,a2,3 /* a2 is the remainder past 4 byte chunks. */
+ beq t8,a2,L(lastb)
+ lw REG3,0(a1)
+ sw REG3,0(a0)
+ PTR_ADDIU a0,a0,4
+ PTR_ADDIU a1,a1,4
+ move a2,t8
+#endif
+
/* Copy the last 8 (or 16) bytes */
L(lastb):
blez a2,L(leave)
@@ -588,6 +602,33 @@ L(leave):
j ra
nop
+/* We jump here with a memcpy of less than 8 or 16 bytes, depending on
+ whether or not USE_DOUBLE is defined. Instead of just doing byte
+ copies, check the alignment and size and use lw/sw if possible.
+ Otherwise, do byte copies. */
+
+L(lasts):
+ andi t8,a2,3
+ beq t8,a2,L(lastb)
+
+ andi t9,a0,3
+ bne t9,zero,L(lastb)
+ andi t9,a1,3
+ bne t9,zero,L(lastb)
+
+ PTR_SUBU a3,a2,t8
+ PTR_ADDU a3,a0,a3
+
+L(wcopy_loop):
+ lw REG3,0(a1)
+ PTR_ADDIU a0,a0,4
+ PTR_ADDIU a1,a1,4
+ bne a0,a3,L(wcopy_loop)
+ sw REG3,-4(a0)
+
+ b L(lastb)
+ move a2,t8
+
#ifndef R6_CODE
/*
* UNALIGNED case, got here with a3 = "negu a0"