aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog3
-rw-r--r--sysdeps/i386/i686/memcpy.S57
2 files changed, 49 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index 76ec21b..38efd89 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
2008-04-09 Ulrich Drepper <drepper@redhat.com>
+ [BZ #4314]
+ * sysdeps/i386/i686/memcpy.S: Optimize copying of aligned buffers.
+
[BZ #5209]
* sysdeps/unix/sysv/syscalls.list: The times syscall doesn't return
an error value.
diff --git a/sysdeps/i386/i686/memcpy.S b/sysdeps/i386/i686/memcpy.S
index 00e84ec..ff5c66e 100644
--- a/sysdeps/i386/i686/memcpy.S
+++ b/sysdeps/i386/i686/memcpy.S
@@ -1,7 +1,7 @@
/* Copy memory block and return pointer to beginning of destination block
For Intel 80x86, x>=6.
This file is part of the GNU C Library.
- Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2000, 2003, 2004, 2008 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
The GNU C Library is free software; you can redistribute it and/or
@@ -41,29 +41,64 @@ END (__memcpy_chk)
ENTRY (BP_SYM (memcpy))
ENTER
- movl LEN(%esp), %ecx
movl %edi, %eax
movl DEST(%esp), %edi
movl %esi, %edx
movl SRC(%esp), %esi
- CHECK_BOUNDS_BOTH_WIDE (%edi, DEST(%esp), %ecx)
- CHECK_BOUNDS_BOTH_WIDE (%esi, SRC(%esp), %ecx)
+ movl %edi, %ecx
+ xorl %esi, %ecx
+ andl $3, %ecx
+ movl LEN(%esp), %ecx
cld
- shrl $1, %ecx
- jnc 1f
+ jne .Lunaligned
+
+ cmpl $3, %ecx
+ jbe .Lunaligned
+
+ testl $3, %esi
+ je 1f
movsb
-1: shrl $1, %ecx
- jnc 2f
- movsw
-2: rep
+ decl %ecx
+ testl $3, %esi
+ je 1f
+ movsb
+ decl %ecx
+ testl $3, %esi
+ je 1f
+ movsb
+ decl %ecx
+1: pushl %eax
+ movl %ecx, %eax
+ shrl $2, %ecx
+ rep
movsl
- movl %eax, %edi
+ movl %eax, %ecx
+ andl $3, %ecx
+ rep
+ movsb
+ popl %eax
+
+.Lend: movl %eax, %edi
movl %edx, %esi
movl DEST(%esp), %eax
RETURN_BOUNDED_POINTER (DEST(%esp))
LEAVE
RET_PTR
+
+ /* When we come here the pointers do not have the same
+ alignment or the length is too short. No need to optimize for
+ aligned memory accesses. */
+.Lunaligned:
+ shrl $1, %ecx
+ jnc 1f
+ movsb
+1: shrl $1, %ecx
+ jnc 2f
+ movsw
+2: rep
+ movsl
+ jmp .Lend
END (BP_SYM (memcpy))
libc_hidden_builtin_def (memcpy)