diff options
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | sysdeps/i386/i686/memcpy.S | 57 |
2 files changed, 49 insertions, 11 deletions
@@ -1,5 +1,8 @@ 2008-04-09 Ulrich Drepper <drepper@redhat.com> + [BZ #4314] + * sysdeps/i386/i686/memcpy.S: Optimize copying of aligned buffers. + [BZ #5209] * sysdeps/unix/sysv/syscalls.list: The times syscall doesn't return an error value. diff --git a/sysdeps/i386/i686/memcpy.S b/sysdeps/i386/i686/memcpy.S index 00e84ec..ff5c66e 100644 --- a/sysdeps/i386/i686/memcpy.S +++ b/sysdeps/i386/i686/memcpy.S @@ -1,7 +1,7 @@ /* Copy memory block and return pointer to beginning of destination block For Intel 80x86, x>=6. This file is part of the GNU C Library. - Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc. + Copyright (C) 1999, 2000, 2003, 2004, 2008 Free Software Foundation, Inc. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. The GNU C Library is free software; you can redistribute it and/or @@ -41,29 +41,64 @@ END (__memcpy_chk) ENTRY (BP_SYM (memcpy)) ENTER - movl LEN(%esp), %ecx movl %edi, %eax movl DEST(%esp), %edi movl %esi, %edx movl SRC(%esp), %esi - CHECK_BOUNDS_BOTH_WIDE (%edi, DEST(%esp), %ecx) - CHECK_BOUNDS_BOTH_WIDE (%esi, SRC(%esp), %ecx) + movl %edi, %ecx + xorl %esi, %ecx + andl $3, %ecx + movl LEN(%esp), %ecx cld - shrl $1, %ecx - jnc 1f + jne .Lunaligned + + cmpl $3, %ecx + jbe .Lunaligned + + testl $3, %esi + je 1f movsb -1: shrl $1, %ecx - jnc 2f - movsw -2: rep + decl %ecx + testl $3, %esi + je 1f + movsb + decl %ecx + testl $3, %esi + je 1f + movsb + decl %ecx +1: pushl %eax + movl %ecx, %eax + shrl $2, %ecx + rep movsl - movl %eax, %edi + movl %eax, %ecx + andl $3, %ecx + rep + movsb + popl %eax + +.Lend: movl %eax, %edi movl %edx, %esi movl DEST(%esp), %eax RETURN_BOUNDED_POINTER (DEST(%esp)) LEAVE RET_PTR + + /* When we come here the pointers do not have the same + alignment or the length is too short. No need to optimize for + aligned memory accesses. */ +.Lunaligned: + shrl $1, %ecx + jnc 1f + movsb +1: shrl $1, %ecx + jnc 2f + movsw +2: rep + movsl + jmp .Lend END (BP_SYM (memcpy)) libc_hidden_builtin_def (memcpy) |