aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-01-30 10:59:15 -0800
committerH.J. Lu <hjl.tools@gmail.com>2017-01-30 10:59:31 -0800
commit02b78ff749f0c88771713368dbb2a09b1979814f (patch)
treef65e7fc16ef78b3e4254dcf0dcdb1e98864c5576
parentf8bf15febcaf137bbec5a61101e88cd5a9d56ca8 (diff)
downloadglibc-02b78ff749f0c88771713368dbb2a09b1979814f.zip
glibc-02b78ff749f0c88771713368dbb2a09b1979814f.tar.gz
glibc-02b78ff749f0c88771713368dbb2a09b1979814f.tar.bz2
Add VZEROUPPER to memset-vec-unaligned-erms.S [BZ #21081]
Since memset-vec-unaligned-erms.S has VDUP_TO_VEC0_AND_SET_RETURN at function entry, memset optimized for AVX2 and AVX512 will always use ymm/zmm register. VZEROUPPER should be placed before ret in L(stosb): movq %rdx, %rcx movzbl %sil, %eax movq %rdi, %rdx rep stosb movq %rdx, %rax ret since it can be reached from L(stosb_more_2x_vec): cmpq $REP_STOSB_THRESHOLD, %rdx ja L(stosb) [BZ #21081] * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S (L(stosb)): Add VZEROUPPER before ret.
-rw-r--r--ChangeLog6
-rw-r--r--sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S2
2 files changed, 8 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 4e0d788..a7f0434 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2017-01-30 H.J. Lu <hongjiu.lu@intel.com>
+
+ [BZ #21081]
+ * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+ (L(stosb)): Add VZEROUPPER before ret.
+
2016-01-28 Carlos O'Donell <carlos@redhat.com>
Alexey Makhalov <amakhalov@vmware.com>
Florian Weimer <fweimer@redhat.com>
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index ff214f0..704eed9 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -110,6 +110,8 @@ ENTRY (__memset_erms)
ENTRY (MEMSET_SYMBOL (__memset, erms))
# endif
L(stosb):
+ /* Issue vzeroupper before rep stosb. */
+ VZEROUPPER
movq %rdx, %rcx
movzbl %sil, %eax
movq %rdi, %rdx