diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2017-01-30 10:59:15 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2017-01-30 10:59:31 -0800 |
commit | 02b78ff749f0c88771713368dbb2a09b1979814f (patch) | |
tree | f65e7fc16ef78b3e4254dcf0dcdb1e98864c5576 | |
parent | f8bf15febcaf137bbec5a61101e88cd5a9d56ca8 (diff) | |
download | glibc-02b78ff749f0c88771713368dbb2a09b1979814f.zip glibc-02b78ff749f0c88771713368dbb2a09b1979814f.tar.gz glibc-02b78ff749f0c88771713368dbb2a09b1979814f.tar.bz2 |
Add VZEROUPPER to memset-vec-unaligned-erms.S [BZ #21081]
Since memset-vec-unaligned-erms.S has VDUP_TO_VEC0_AND_SET_RETURN at
function entry, memset optimized for AVX2 and AVX512 will always use
ymm/zmm register. VZEROUPPER should be placed before ret in
L(stosb):
movq %rdx, %rcx
movzbl %sil, %eax
movq %rdi, %rdx
rep stosb
movq %rdx, %rax
ret
since it can be reached from
L(stosb_more_2x_vec):
cmpq $REP_STOSB_THRESHOLD, %rdx
ja L(stosb)
[BZ #21081]
* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
(L(stosb)): Add VZEROUPPER before ret.
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S | 2 |
2 files changed, 8 insertions, 0 deletions
@@ -1,3 +1,9 @@ +2017-01-30 H.J. Lu <hongjiu.lu@intel.com> + + [BZ #21081] + * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S + (L(stosb)): Add VZEROUPPER before ret. + 2016-01-28 Carlos O'Donell <carlos@redhat.com> Alexey Makhalov <amakhalov@vmware.com> Florian Weimer <fweimer@redhat.com> diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S index ff214f0..704eed9 100644 --- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S @@ -110,6 +110,8 @@ ENTRY (__memset_erms) ENTRY (MEMSET_SYMBOL (__memset, erms)) # endif L(stosb): + /* Issue vzeroupper before rep stosb. */ + VZEROUPPER movq %rdx, %rcx movzbl %sil, %eax movq %rdi, %rdx |