aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/multiarch
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-07-22 20:26:25 -0700
committerH.J. Lu <hjl.tools@gmail.com>2021-07-28 13:23:57 -0700
commit91cc803d27bda34919717b496b53cf279e44a922 (patch)
tree0a6d0b00a94015f1554b6de690a620600dc7513d /sysdeps/x86_64/multiarch
parentc25c32165d8b7c506442fdc0304f7a3a223e1f42 (diff)
downloadglibc-91cc803d27bda34919717b496b53cf279e44a922.zip
glibc-91cc803d27bda34919717b496b53cf279e44a922.tar.gz
glibc-91cc803d27bda34919717b496b53cf279e44a922.tar.bz2
x86-64: Add Avoid_Short_Distance_REP_MOVSB
commit 3ec5d83d2a237d39e7fd6ef7a0bc8ac4c171a4a5 Author: H.J. Lu <hjl.tools@gmail.com> Date: Sat Jan 25 14:19:40 2020 -0800 x86-64: Avoid rep movsb with short distance [BZ #27130] introduced some regressions on Intel processors without Fast Short REP MOV (FSRM). Add Avoid_Short_Distance_REP_MOVSB to avoid rep movsb with short distance only on Intel processors with FSRM. bench-memmove-large on Skylake server shows that cycles of __memmove_evex_unaligned_erms improves for the following data size: before after Improvement length=4127, align1=3, align2=0: 479.38 349.25 27% length=4223, align1=9, align2=5: 405.62 333.25 18% length=8223, align1=3, align2=0: 786.12 496.38 37% length=8319, align1=9, align2=5: 727.50 501.38 31% length=16415, align1=3, align2=0: 1436.88 840.00 41% length=16511, align1=9, align2=5: 1375.50 836.38 39% length=32799, align1=3, align2=0: 2890.00 1860.12 36% length=32895, align1=9, align2=5: 2891.38 1931.88 33%
Diffstat (limited to 'sysdeps/x86_64/multiarch')
-rw-r--r--sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S5
1 files changed, 5 insertions, 0 deletions
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index a783da5..9f02624 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -325,12 +325,16 @@ L(movsb):
/* Avoid slow backward REP MOVSB. */
jb L(more_8x_vec_backward)
# if AVOID_SHORT_DISTANCE_REP_MOVSB
+ andl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
+ jz 3f
movq %rdi, %rcx
subq %rsi, %rcx
jmp 2f
# endif
1:
# if AVOID_SHORT_DISTANCE_REP_MOVSB
+ andl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
+ jz 3f
movq %rsi, %rcx
subq %rdi, %rcx
2:
@@ -338,6 +342,7 @@ L(movsb):
is N*4GB + [1..63] with N >= 0. */
cmpl $63, %ecx
jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */
+3:
# endif
mov %RDX_LP, %RCX_LP
rep movsb