diff options
Diffstat (limited to 'newlib/libc/machine/arc/strcpy-700.S')
-rw-r--r-- | newlib/libc/machine/arc/strcpy-700.S | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/newlib/libc/machine/arc/strcpy-700.S b/newlib/libc/machine/arc/strcpy-700.S new file mode 100644 index 0000000..e6f99dc --- /dev/null +++ b/newlib/libc/machine/arc/strcpy-700.S @@ -0,0 +1,68 @@ +/* Copyright (C) 2007 Free Software Foundation, Inc. + This file is free software; you can redistribute it and/or modify + it under the same terms as newlib/libc/string/strcpy.c . */ + +#include "asm.h" + +#ifdef __ARC700__ +/* If dst and src are 4 byte aligned, copy 8 bytes at a time. + If the src is 4, but not 8 byte aligned, we first read 4 bytes to get + it 8 byte aligned. Thus, we can do a little read-ahead, without + dereferencing a cache line that we should not touch. + Note that short and long instructions have been scheduled to avoid + branch stalls. + The beq_s to r3z could be made unaligned & long to avoid a stall + there, but the it is not likely to be taken often, and it + would also be likey to cost an unaligned mispredict at the next call. */ + +ENTRY(strcpy) + or r2,r0,r1 + bmsk_s r2,r2,1 + brne.d r2,0,charloop + mov_s r10,r0 + ld_s r3,[r1,0] + mov r8,0x01010101 + bbit0.d r1,2,loop_start + ror r12,r8 + sub r2,r3,r8 + bic_s r2,r2,r3 + tst_s r2,r12 + bne_l r3z + mov_s r4,r3 + .balign 4 +loop: + ld.a r3,[r1,4] + st.ab r4,[r10,4] +loop_start: + ld.a r4,[r1,4] + sub r2,r3,r8 + bic_s r2,r2,r3 + tst_l r2,r12 + bne_l r3z + st.ab r3,[r10,4] + sub r2,r4,r8 + bic r2,r2,r4 + tst_l r2,r12 + beq_l loop + mov_s r3,r4 +#ifdef __LITTLE_ENDIAN__ +r3z: bmsk.f r1,r3,7 + lsr_s r3,r3,8 +#else +r3z: lsr.f r1,r3,24 + asl_s r3,r3,8 +#endif + bne.d r3z + stb.ab r1,[r10,1] + j_s [blink] + + .balign 4 +charloop: + ldb.ab r3,[r1,1] + + + brne.d r3,0,charloop + stb.ab r3,[r10,1] + j [blink] +ENDFUNC(strcpy) +#endif /* __ARC700__ */ |