diff options
Diffstat (limited to 'newlib/libc/machine/sh/memcpy.S')
-rw-r--r-- | newlib/libc/machine/sh/memcpy.S | 221 |
1 files changed, 0 insertions, 221 deletions
diff --git a/newlib/libc/machine/sh/memcpy.S b/newlib/libc/machine/sh/memcpy.S deleted file mode 100644 index 4df72e3..0000000 --- a/newlib/libc/machine/sh/memcpy.S +++ /dev/null @@ -1,221 +0,0 @@ -! -! Fast SH memcpy -! -! by Toshiyasu Morita (tm@netcom.com) -! hacked by J"orn Rernnecke (amylaar@cygnus.co.uk) ("o for o-umlaut) -! -! Entry: r4: destination pointer -! r5: source pointer -! r6: byte count -! -! Exit: r0: destination pointer -! r1-r7: trashed -! -! Notes: Usually one wants to do small reads and write a longword, but -! unfortunately it is difficult in some cases to concatanate bytes -! into a longword on the SH, so this does a longword read and small -! writes. -! -! This implementation makes two assumptions about how it is called: -! -! 1.: If the byte count is nonzero, the address of the last byte to be -! copied is unsigned greater than the address of the first byte to -! be copied. This could be easily swapped for a signed comparison, -! but the algorithm used needs some comparison. -! -! 2.: When there are two or three bytes in the last word of an 11-or-bore -! bytes memory chunk to b copied, the rest of the word can be read -! without size effects. -! This could be easily changed by increasing the minumum size of -! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2, -! however, this would cost a few extra cyles on average. -! - -#include "asm.h" - -ENTRY(memcpy) -#ifdef __LITTLE_ENDIAN__ - ! Little endian version copies with increasing addresses. - mov r4,r3 ! Save return value - mov #11,r0 ! Check if small number of bytes - cmp/hs r0,r6 - ! r6 becomes src end address - SL(bf, L_small, add r5,r6) - mov #1,r1 - tst r1,r5 ! check if source even - SL(bt, L_even, mov r6,r7) - mov.b @r5+,r0 ! no, make it even. - mov.b r0,@r4 - add #1,r4 -L_even: tst r1,r4 ! check if destination is even - add #-3,r7 - SL(bf, L_odddst, mov #2,r1) - tst r1,r4 ! check if destination is 4-byte aligned - mov r4,r0 - SL(bt, L_al4dst, sub r5,r0) - mov.w @r5+,r2 - mov.w r2,@r4 - ! add #2,r4 r4 is dead here. -L_al4dst: - tst r1,r5 - bt L_al4both - mov.w @r5+,r1 - swap.w r1,r1 - add #-6,r0 - add #-6,r7 ! r7 := src end address minus 9. - .align 2 -L_2l_loop: - mov.l @r5+,r2 ! Read & write two longwords per iteration - xtrct r2,r1 - mov.l r1,@(r0,r5) - cmp/hs r7,r5 - mov.l @r5+,r1 - xtrct r1,r2 - mov.l r2,@(r0,r5) - bf L_2l_loop - add #-2,r5 - bra L_cleanup - add #5,r0 -L_al4both: - add #-4,r0 - .align 2 -L_al4both_loop: - mov.l @r5+,r4 ! Read longword, write longword per iteration - cmp/hs r7,r5 - SL(bf, L_al4both_loop, mov.l r4,@(r0,r5)) - - bra L_cleanup - add #3,r0 - -L_odddst: - tst r1,r5 - SL(bt, L_al4src, add #-1,r4) - mov.w @r5+,r0 - mov.b r0,@(1,r4) - shlr8 r0 - mov.b r0,@(2,r4) - add #2,r4 -L_al4src: - .align 2 -L_odd_loop: - mov.l @r5+,r0 ! Read longword, write byte, word, byte per iteration - cmp/hs r7,r5 - mov.b r0,@(1,r4) - shlr8 r0 - mov.w r0,@(2,r4) - shlr16 r0 - mov.b r0,@(4,r4) - SL(bf, L_odd_loop, add #4,r4) - .align 2 ! avoid nop in more frequently executed code. -L_cleanup2: - mov r4,r0 - sub r5,r0 -L_cleanup: - cmp/eq r6,r5 - bt L_ready - .align 2 -L_cleanup_loop: - mov.b @r5+,r1 - cmp/eq r6,r5 - mov.b r1,@(r0,r5) - bf L_cleanup_loop -L_ready: - rts - mov r3,r0 -L_small: - bra L_cleanup2 - add #-1,r4 -#else - ! Big endian version copies with decreasing addresses. - mov r4,r0 - add r6,r0 - sub r4,r5 - mov #11,r1 - cmp/hs r1,r6 - SL(bf, L_small, add #-1,r5) - mov r5,r3 - add r0,r3 - shlr r3 - SL(bt, L_even, - mov r4,r7) - mov.b @(r0,r5),r2 - add #-1,r3 - mov.b r2,@-r0 -L_even: - tst #1,r0 - add #-1,r5 - SL(bf, L_odddst, add #8,r7) - tst #2,r0 - bt L_al4dst - add #-1,r3 - mov.w @(r0,r5),r1 - mov.w r1,@-r0 -L_al4dst: - shlr r3 - bt L_al4both - mov.w @(r0,r5),r1 - swap.w r1,r1 - add #4,r7 - add #-4,r5 - .align 2 -L_2l_loop: - mov.l @(r0,r5),r2 - xtrct r2,r1 - mov.l r1,@-r0 - cmp/hs r7,r0 - mov.l @(r0,r5),r1 - xtrct r1,r2 - mov.l r2,@-r0 - bt L_2l_loop - bra L_cleanup - add #5,r5 - - nop ! avoid nop in executed code. -L_al4both: - add #-2,r5 - .align 2 -L_al4both_loop: - mov.l @(r0,r5),r1 - cmp/hs r7,r0 - SL(bt, L_al4both_loop, - mov.l r1,@-r0) - bra L_cleanup - add #3,r5 - - nop ! avoid nop in executed code. -L_odddst: - shlr r3 - bt L_al4src - mov.w @(r0,r5),r1 - mov.b r1,@-r0 - shlr8 r1 - mov.b r1,@-r0 -L_al4src: - add #-2,r5 - .align 2 -L_odd_loop: - mov.l @(r0,r5),r2 - cmp/hs r7,r0 - mov.b r2,@-r0 - shlr8 r2 - mov.w r2,@-r0 - shlr16 r2 - mov.b r2,@-r0 - bt L_odd_loop - - add #3,r5 -L_cleanup: -L_small: - cmp/eq r4,r0 - bt L_ready - add #1,r4 - .align 2 -L_cleanup_loop: - mov.b @(r0,r5),r2 - cmp/eq r4,r0 - mov.b r2,@-r0 - bf L_cleanup_loop -L_ready: - rts - nop -#endif |