aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sysdeps/x86_64/multiarch/memmove-ssse3.S14
1 files changed, 9 insertions, 5 deletions
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S
index 048d015..01008fd 100644
--- a/sysdeps/x86_64/multiarch/memmove-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S
@@ -151,13 +151,10 @@ L(more_2x_vec):
loop. */
movups %xmm0, (%rdi)
-# ifdef SHARED_CACHE_SIZE_HALF
- cmp $SHARED_CACHE_SIZE_HALF, %RDX_LP
-# else
- cmp __x86_shared_cache_size_half(%rip), %rdx
-# endif
+ cmp __x86_shared_non_temporal_threshold(%rip), %rdx
ja L(large_memcpy)
+L(loop_fwd):
leaq -64(%rdi, %rdx), %r8
andq $-16, %rdi
movl $48, %edx
@@ -199,6 +196,13 @@ L(large_memcpy):
movups -64(%r9, %rdx), %xmm10
movups -80(%r9, %rdx), %xmm11
+ /* Check if src and dst overlap. If they do use cacheable
+ writes to potentially gain positive interference between
+ the loads during the memmove. */
+ subq %rdi, %r9
+ cmpq %rdx, %r9
+ jb L(loop_fwd)
+
sall $5, %ecx
leal (%rcx, %rcx, 2), %r8d
leaq -96(%rdi, %rdx), %rcx